[open-coarrays] 01/80: Upstream 1.6.0

Alastair McKinstry mckinstry at moszumanska.debian.org
Wed Oct 25 13:45:33 UTC 2017


This is an automated email from the git hooks/post-receive script.

mckinstry pushed a commit to branch debian/master
in repository open-coarrays.

commit 82c624cc8784d80c563c5646f3479b01f81c42b4
Author: Alastair McKinstry <mckinstry at debian.org>
Date:   Mon May 30 19:41:07 2016 +0100

    Upstream 1.6.0
---
 AUTHORS.md                                         |   22 +
 CAF_ABI.md                                         |  571 +++++
 CMakeLists.txt                                     |  212 ++
 CONTRIBUTING.md                                    |  114 +
 GETTING_STARTED.md                                 |  132 +
 INSTALL.md                                         |  249 ++
 LICENSE                                            |   35 +
 README.md                                          |  136 ++
 STATUS.md                                          |  205 ++
 cmake/pkg/OpenCoarraysConfig.cmake.in              |    1 +
 cmake/uninstall.cmake.in                           |   23 +
 doc/dependency_tree/gitkeep.sh                     |   73 +
 doc/dependency_tree/opencoarrays-tree.txt          |   12 +
 .../opencoarrays/cmake-3.4.0/.gitkeep              |    0
 .../flex-2.6.0/bison-3.0.4/m4-1.4.17/.gitkeep      |    1 +
 .../mpich-3.1.4/gcc-6.1.0/gmp/.gitkeep             |    1 +
 .../mpich-3.1.4/gcc-6.1.0/mpc/.gitkeep             |    1 +
 .../mpich-3.1.4/gcc-6.1.0/mpfr/.gitkeep            |    1 +
 doc/robodoc.css                                    |  309 +++
 doc/robodoc.rc                                     |   67 +
 doc/sample-compiler-output.c                       |  108 +
 doc/sample-fortran-source.f90                      |   37 +
 install.sh                                         |  312 +++
 install.sh-usage                                   |   19 +
 prerequisites/acceptable_compiler.f90              |   39 +
 prerequisites/build-functions/build_and_install.sh |   51 +
 .../build-functions/download_if_necessary.sh       |   87 +
 prerequisites/build-functions/ftp-url.sh           |   37 +
 .../set_SUDO_if_needed_to_write_to_directory.sh    |   34 +
 prerequisites/build-functions/set_compilers.sh     |   42 +
 .../set_or_print_default_version.sh                |   62 +
 .../build-functions/set_or_print_downloader.sh     |   57 +
 .../set_or_print_installation_path.sh              |   19 +
 prerequisites/build-functions/set_or_print_url.sh  |   85 +
 .../build-functions/unpack_if_necessary.sh         |   18 +
 prerequisites/build.sh                             |  147 ++
 prerequisites/build.sh-usage                       |   23 +
 prerequisites/check_version.sh                     |  152 ++
 .../install-binary-functions/build_parse_table.sh  |   13 +
 .../move_binaries_to_install_path.sh               |   44 +
 .../set_or_print_csv_binary_names.sh               |   31 +
 .../set_or_print_default_version.sh                |   47 +
 .../set_or_print_downloader.sh                     |   39 +
 .../install-binary-functions/set_or_print_url.sh   |   52 +
 prerequisites/install-binary.sh                    |  142 ++
 prerequisites/install-binary.sh-usage              |   14 +
 .../install-functions/build_opencoarrays.sh        |   26 +
 prerequisites/install-functions/find_or_install.sh |  644 +++++
 prerequisites/install-functions/print_header.sh    |   34 +
 prerequisites/install-functions/report_results.sh  |  139 ++
 prerequisites/install-ofp.sh                       |  179 ++
 prerequisites/install-ofp.sh-usage                 |   11 +
 prerequisites/print_true.f90                       |    2 +
 prerequisites/stack.sh                             |  254 ++
 prerequisites/use-case/bootstrap.sh                |   41 +
 prerequisites/use-case/define_functions.sh         |  105 +
 prerequisites/use-case/parse_command_line.sh       |  127 +
 prerequisites/use-case/set_common_switches.sh      |   51 +
 .../use-case/set_environment_and_color.sh          |   44 +
 prerequisites/use-case/set_magic_variables.sh      |   46 +
 src/CMakeLists.txt                                 |    4 +
 src/Makefile                                       |   63 +
 src/armci/Makefile                                 |   21 +
 src/armci/armci.c                                  | 1006 ++++++++
 src/common/Makefile                                |    9 +
 src/common/caf_auxiliary.c                         |   60 +
 src/extensions/caf-foot                            |  117 +
 src/extensions/caf-head                            |   32 +
 src/extensions/cafrun-foot                         |   47 +
 src/extensions/cafrun-head                         |   35 +
 src/extensions/opencoarrays.F90                    |  744 ++++++
 src/gasnet/CMakeLists.txt                          |    1 +
 src/gasnet/Makefile                                |   44 +
 src/gasnet/gasnet.c                                | 1201 ++++++++++
 src/libcaf-gfortran-descriptor.h                   |  144 ++
 src/libcaf.h                                       |  154 ++
 src/make.inc                                       |   30 +
 src/make.inc.Cray-XE                               |   35 +
 src/mpi/CMakeLists.txt                             |   79 +
 src/mpi/Makefile                                   |   21 +
 src/mpi/mpi_caf.c                                  | 2528 ++++++++++++++++++++
 src/single/CMakeLists.txt                          |    5 +
 src/single/Makefile                                |   19 +
 src/single/single.c                                |  727 ++++++
 src/tests/CMakeLists.txt                           |   17 +
 src/tests/UH_CAF_perf_validation_suite_v1.0.1.txt  |    7 +
 src/tests/integration/CMakeLists.txt               |    7 +
 .../integration/coarrayHelloWorld/CMakeLists.txt   |    2 +
 src/tests/integration/coarrayHelloWorld/Makefile   |   16 +
 .../coarrayHelloWorld/hello_multiverse.F90         |   60 +
 .../integration/dist_transpose/CMakeLists.txt      |    9 +
 .../integration/dist_transpose/Makefile_NS_GASNET  |   32 +
 .../coarray_distributed_transpose.F90              |  309 +++
 src/tests/integration/dist_transpose/walltime.o    |  Bin 0 -> 980 bytes
 src/tests/integration/dist_transpose/walltime.x64  |   20 +
 src/tests/integration/pde_solvers/CMakeLists.txt   |    3 +
 src/tests/integration/pde_solvers/README.md        |   50 +
 .../pde_solvers/coarrayBurgers/CMakeLists.txt      |   30 +
 .../pde_solvers/coarrayBurgers/Makefile            |   99 +
 .../pde_solvers/coarrayBurgers/Makefile.inst       |  110 +
 .../pde_solvers/coarrayBurgers/global_field.F90    |  199 ++
 .../include-files/cray_capabilities.txt            |    3 +
 .../include-files/gfortran4.8_capabilities.txt     |    3 +
 .../include-files/gfortran_capabilities.txt        |    1 +
 .../include-files/ibm_capabilities.txt             |    2 +
 .../include-files/intel_capabilities.txt           |    3 +
 .../include-files/nag_capabilities.txt             |    3 +
 .../include-files/portlandgroup_capabilities.txt   |    3 +
 .../include-files/tau_cray_capabilities.txt        |    5 +
 .../include-files/tau_intel_capabilities.txt       |    5 +
 .../library/ForTrilinos_assertion_utility.F90      |  174 ++
 .../coarrayBurgers/library/ForTrilinos_error.F90   |   86 +
 .../coarrayBurgers/library/co_object_interface.F90 |   47 +
 .../coarrayBurgers/library/object_interface.F90    |   51 +
 .../pde_solvers/coarrayBurgers/local_field.F90     |   76 +
 .../pde_solvers/coarrayBurgers/main.F90            |   86 +
 .../integration/pde_solvers/coarrayBurgers/run.sh  |   24 +
 .../coarrayBurgers/scripts/coarrayBurgers_cce.pbs  |   15 +
 .../scripts/coarrayBurgers_cce_multiple.pbs        |   26 +
 .../scripts/coarrayBurgers_cce_tau.pbs             |   24 +
 .../pde_solvers/coarrayBurgers/scripts/run.sh      |   19 +
 .../coarrayBurgers/scripts/run.sh.with_modules     |   12 +
 .../coarrayBurgers/scripts/run_troubleshooting.sh  |   13 +
 .../coarrayHeatSimplified/CMakeLists.txt           |    6 +
 .../pde_solvers/coarrayHeatSimplified/Makefile     |   36 +
 .../coarrayHeatSimplified/global_field.f90         |  136 ++
 .../coarrayHeatSimplified/local_field.f90          |   64 +
 .../pde_solvers/coarrayHeatSimplified/main.f90     |   45 +
 .../include-files/cray_capabilities.txt            |    0
 .../include-files/gfortran_capabilities.txt        |    1 +
 .../pde_solvers/include-files/ibm_capabilities.txt |    2 +
 .../include-files/intel_capabilities.txt           |    3 +
 .../pde_solvers/include-files/nag_capabilities.txt |    1 +
 .../include-files/portlandgroup_capabilities.txt   |    1 +
 .../include-files/tau_cray_capabilities.txt        |    5 +
 .../include-files/tau_intel_capabilities.txt       |    5 +
 .../integration/pde_solvers/library/CMakeLists.txt |    7 +
 .../library/ForTrilinos_assertion_utility.F90      |  169 ++
 .../pde_solvers/library/ForTrilinos_error.F90      |   86 +
 .../pde_solvers/library/co_object_interface.F90    |   73 +
 .../pde_solvers/library/object_interface.F90       |   77 +
 .../pde_solvers/library/parse_command_line.f90     |  104 +
 .../pde_solvers/navier-stokes/CMakeLists.txt       |   19 +
 .../integration/pde_solvers/navier-stokes/Makefile |   39 +
 .../pde_solvers/navier-stokes/Makefile_NS_GASNET   |   38 +
 .../pde_solvers/navier-stokes/coarray-shear.f90    | 1018 ++++++++
 .../navier-stokes/coarray-shear_coll.F90           | 1046 ++++++++
 .../navier-stokes/coarray-shear_coll_lock.f90      | 1036 ++++++++
 .../pde_solvers/navier-stokes/libfft_avx.a         |  Bin 0 -> 50746 bytes
 .../pde_solvers/navier-stokes/libfft_sse.a         |  Bin 0 -> 46474 bytes
 .../pde_solvers/navier-stokes/mpi-shear.f90        | 1034 ++++++++
 .../pde_solvers/navier-stokes/walltime.o           |  Bin 0 -> 980 bytes
 src/tests/performance/BurgersMPI/CMakeLists.txt    |   33 +
 src/tests/performance/BurgersMPI/Makefile          |  100 +
 src/tests/performance/BurgersMPI/input_file.F90    |   33 +
 .../performance/BurgersMPI/kind_parameters.F90     |   35 +
 src/tests/performance/BurgersMPI/main.F90          |  105 +
 src/tests/performance/BurgersMPI/mpi_module.F90    |  118 +
 src/tests/performance/BurgersMPI/mpi_share.F90     |   31 +
 .../performance/BurgersMPI/periodic_2nd_order.F90  |  341 +++
 src/tests/performance/BurgersMPI/shared.F90        |   36 +
 src/tests/performance/CMakeLists.txt               |    3 +
 .../performance/mpi_dist_transpose/CMakeLists.txt  |   16 +
 .../mpi_distributed_transpose.F90                  |  381 +++
 .../performance/mpi_dist_transpose/walltime.o      |  Bin 0 -> 980 bytes
 .../performance/mpi_dist_transpose/walltime.x64    |   20 +
 src/tests/performance/psnap/CMakeLists.txt         |    2 +
 src/tests/performance/psnap/cafpsnap.f90           |  707 ++++++
 src/tests/performance/psnap/timemeasure.c          |   56 +
 src/tests/unit/CMakeLists.txt                      |    9 +
 src/tests/unit/collectives/CMakeLists.txt          |   14 +
 src/tests/unit/collectives/co_broadcast.F90        |   92 +
 src/tests/unit/collectives/co_max.F90              |   71 +
 src/tests/unit/collectives/co_min.F90              |   71 +
 src/tests/unit/collectives/co_reduce.F90           |  130 +
 src/tests/unit/collectives/co_sum.F90              |   86 +
 src/tests/unit/extensions/CMakeLists.txt           |   44 +
 src/tests/unit/init_register/CMakeLists.txt        |   17 +
 .../unit/init_register/allocate_as_barrier.f90     |   24 +
 .../init_register/allocate_as_barrier_proc.f90     |   32 +
 src/tests/unit/init_register/initialize_mpi.f90    |   46 +
 src/tests/unit/init_register/register.f90          |   47 +
 .../unit/init_register/register_rename_me.f90      |   43 +
 .../unit/init_register/register_rename_me_too.f90  |   49 +
 src/tests/unit/send-get/CMakeLists.txt             |   20 +
 src/tests/unit/send-get/get_array_test.f90         |  420 ++++
 src/tests/unit/send-get/get_with_offset_1d.f90     |   31 +
 src/tests/unit/send-get/old_get_array_test.f90     |  344 +++
 src/tests/unit/send-get/sameloc.f90                |  139 ++
 src/tests/unit/send-get/send_array_test.f90        |  398 +++
 src/tests/unit/send-get/strided_get.f90            |   25 +
 src/tests/unit/send-get/whole_get_array.f90        |   89 +
 src/tests/unit/simple/CMakeLists.txt               |   17 +
 src/tests/unit/simple/Makefile                     |   31 +
 src/tests/unit/simple/syncimages.f90               |   58 +
 src/tests/unit/simple/syncimages2.c                |  114 +
 src/tests/unit/simple/syncimages2.f90              |   44 +
 src/tests/unit/simple/test1Caf.f90                 |   63 +
 src/tests/unit/simple/testAtomics.f90              |   28 +
 src/tests/unit/sync/CMakeLists.txt                 |   23 +
 src/tests/unit/sync/duplicate_syncimages.f90       |   47 +
 src/tests/unit/sync/syncall.f90                    |   54 +
 src/tests/unit/sync/syncimages.f90                 |   58 +
 src/tests/unit/sync/syncimages2.f90                |   44 +
 src/tests/unit/sync/syncimages_status.f90          |   22 +
 205 files changed, 24268 insertions(+)

diff --git a/AUTHORS.md b/AUTHORS.md
new file mode 100644
index 0000000..34ac3fa
--- /dev/null
+++ b/AUTHORS.md
@@ -0,0 +1,22 @@
+Contributors to OpenCoarrays
+============================
+
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/AUTHORS.pdf)
+
+Download this file as a PDF document
+[here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/AUTHORS.pdf).
+
+ - Alessandro Fanfarillo  <fanfarillo at ing.uniroma2.it>
+ - Damian Rouson  <damian at rouson.net>
+ - Izaak Beekman  <http://izaakbeekman.com>
+ - Daniel Carrera  <dcarrera at gmail.com>
+ - Jeff Hammond  <jeff.science at gmail.com>
+ - Tobias Burnus  <burnus at net-b.de>
+
+[I think we're missing a number of people here]:#
+[@naveen-rn?]:#
+[@raul-nasner?]:#
+[Others:]:#
+
+[Links]:#
+[pdf img]: https://img.shields.io/badge/PDF-AUTHORS.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/CAF_ABI.md b/CAF_ABI.md
new file mode 100644
index 0000000..342af4d
--- /dev/null
+++ b/CAF_ABI.md
@@ -0,0 +1,571 @@
+[This document is formatted with GitHub-Flavored Markdown.              ]:#
+[For better viewing, including hyperlinks, read it online at            ]:#
+[https://github.com/sourceryinstitute/opencoarrays/blob/master/CAF_API.md]:#
+
+OpenCoarrays Application Binary Interface (ABI)
+===============================================
+
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/CAF_ABI.pdf)
+
+Download this file as a PDF document
+[here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/CAF_ABI.pdf).
+
+* [To Do](#to-do)
+* [Implementation status](#implementation-status)
+* [Definitions and types](#definitions-and-types)
+* [Provided functions](#provided-functions)
+
+This document describes the OpenCoarrays application binary interface (ABI) through
+which a compiler accesses coarray functionality.  As such, the target audience for
+this document is compiler developers.  Most application developers need only write
+standard-conforming Fortran 2008 or 2015 and compile their code with the OpenCoarrays
+`caf` compiler wrapper without knowledge of the ABI.
+
+The actual function names in this document have a PREFIX in the source code to avoid
+name clashes.  The prefix can be vendor-specific.
+
+To Do
+-----
+
+* [ ] Discuss the current draft
+* [ ] Add missing functions of the current gfortran implementation
+* [ ] Address the TODO items
+* [ ] Extend the functions to match a sensible set
+* [ ] Update the implementation status, especially for the ARMCI library
+
+Implementation status
+---------------------
+
+The library implementation in this directory should be ABI-compatible
+with the wording below, except for some `int errmsg_len` vs. `size_t`
+changes that have not yet been implemented.
+
+Definitions and types
+---------------------
+
+### 2.1  `caf_token_t` ###
+
+Typedef of type `void *` on the compiler side. Can be any data
+type on the library side.
+
+### 2.2  `caf_register_t` ###
+
+Type indicating which kind of coarray variable should be registered.
+
+```c
+typedef enum caf_register_t {
+  CAF_REGTYPE_COARRAY_STATIC,
+  CAF_REGTYPE_COARRAY_ALLOC,
+  CAF_REGTYPE_LOCK_STATIC,
+  CAF_REGTYPE_LOCK_ALLOC,
+  CAF_REGTYPE_CRITICAL,
+  CAF_REGTYPE_EVENT_STATIC,
+  CAF_REGTYPE_EVENT_ALLOC
+  }
+caf_register_t;
+```
+
+__TODO__:
+  Check whether this set is complete and makes sense
+
+
+### 2.3  `caf_token_t` ###
+
+In terms of the processor, an opaque pointer, which is used to identify a
+coarray.  The exact content is implementation-defined by the library.
+
+### 2.4  Stat values ###
+
+```c
+#define STAT_UNLOCKED           0
+#define STAT_LOCKED             1
+#define STAT_LOCKED_OTHER_IMAGE 2
+#define STAT_STOPPED_IMAGE      6000
+```
+
+__TODO__:
+  Define more, allow room for lib-specific values, update for [TS18508].
+  Do we need to take care of special vendor choices?
+
+__Note__:
+  Some values have to be such that they differ from certain other
+  values.
+
+
+Provided functions
+------------------
+
+### 3.1  Initialization function ###
+
+```c
+void caf_init (int *argc, char ***argv)
+```
+
+This function shall be called at startup of the program before the Fortran main
+program.  It takes as arguments the command-line arguments of the program. It is
+permitted to pass to NULL pointers as argument; if non-NULL, the library is
+permitted to modify the arguments.
+
+| Argument	| `intent`	| description	|
+| ------	| ------	| ------	|
+| `argc`	| `inout`	| An integer pointer with the number of arguments passed to the program or NULL.	|
+| `argv`	| `inout`	| A pointer to an array of strings with the      command-line arguments or NULL.	|
+
+__Note__:
+  The function is modeled after the initialization function of the
+  Message Passing Interface (MPI) specification.  Due to the way coarray
+  registration (3.5) works, it might not be the first call to the libaray. If
+  the main program is not written in Fortran and only a library uses coarrays,
+  it can happen that this function is never called.  Therefore, it is
+  recommended that the library does not rely on the passed arguments and whether
+  the call has been done.
+
+__GCC__:
+  In gfortran, the function is generated when the Fortran main program is
+  compiled with -fcoarray=lib; the call happens before the run-time library
+  initialiation such that changes to the command-line arguments will be visible
+  when the command-line intrinsics are invoked.
+
+
+### 3.2  Finalization function ###
+
+```c
+void caf_finish (void)
+```
+
+This function shall be called at the end of the program to permit a graceful
+shutdown.
+
+__Note__:
+  It is recommended to add this call at the end of the Fortran main program and
+  when invoking `STOP`.  To ensure that the shutdown is also performed for
+  programs where this function is not explicitly invoked, for instance
+  non-Fortran programs or calls to the system's `exit()` function, the library can
+  use a destructor function.  Note that programs can also be terminated using
+  the `ERROR STOP` statement, which is handled via its own library call.
+
+__GCC__:
+  In gfortran, this function is called at the end of the Fortran main program and
+  when before the program stops with a `STOP` command, the respective file has been
+  compiled with the `-fcoarray=lib` option.
+
+
+### 3.3 Querying the image number ###
+
+```c
+int caf_this_image (int distance)
+```
+
+This function returns the current image number, which is a positive number.
+
+| Argument	| description	|
+| ------	| ------	|
+| `distance`	| As specified for the `this_image` intrinsic in [TS18508]. Shall be a nonnegative number.	|
+
+__Note__:
+  If the Fortran intrinsic `this_image()` is invoked without an argument, which is the only permitted form in Fortran 2008, the processor shall pass 0 as first argument.
+
+__GCC__:
+  (No special note.)
+
+
+
+### 3.4 Querying the maximal number of images ###
+
+```c
+int caf_num_images (int distance, int failed)
+```
+
+This function returns the number of images in the current team, if distance is 0
+or the number of images in the parent team at the specified distance. If failed
+is -1, the function returns the number of all images at the specified
+distance; if it is 0, the function returns the number of non-failed images, and
+if it is 1, it returns the number of failed images.
+
+| Argument	| description	|
+| ------	| ------	|
+| `distance`	| the distance from this image to the ancestor. Shall be positive.	|
+| `failed`	| shall be -1, 0, or 1	|
+
+__Note__:
+  This function follows [TS18508]. If the `num_image` intrinsic has no arguments,
+  the processor shall pass `distance = 0` and `failed = -1` to the function.
+
+__GCC__:
+  (No special note.)
+
+
+
+### 3.5 Registering coarrays ###
+
+```c
+void *caf_register (size_t size, caf_register_t type, caf_token_t *token, int *stat, char *errmsg, int errmsg_len)
+```
+
+Allocates memory for a coarray and creates a token to identify the coarray. The
+function is called for both coarrays with `SAVE` attribute and using an explicit
+`ALLOCATE` statement. If an error occurs and `STAT` is a `NULL` pointer, the function
+shall abort with printing an error message and starting the error termination.
+If no error occurs and `STAT=` is present, it shall be set to zero. Otherwise, it
+shall be set to a positive value and, if not- at code{NULL}, @var{ERRMSG} shall be
+set to a string describing the failure.  The function returns a pointer to the
+requested memory for the local image as a call to `malloc` would do.
+
+For `CAF_REGTYPE_COARRAY_STATIC` and `CAF_REGTYPE_COARRAY_ALLOC`, the passed size is
+the byte size requested. For `CAF_REGTYPE_LOCK_STATIC`, `CAF_REGTYPE_LOCK_ALLOC`
+and `CAF_REGTYPE_CRITICAL` it is the array size or one for a scalar.
+
+| Argument	| description	|
+| ------	| ------	|
+| `size`	| For normal coarrays, the byte size of the coarray to be allocated; for lock types, the number of elements.	|
+| `type`	| one of the `caf_register_t` types. Possible values: `CAF_REGTYPE_COARRAY_STATIC` - for nonallocatable coarrays `CAF_REGTYPE_COARRAY_ALLOC` - for allocatable coarrays `CAF_REGTYPE_LOCK_STATIC` - for nonallocatable lock variables `CAF_REGTYPE_LOCK_ALLOC` - for allocatable lock variables `CAF_REGTYPE_CRITICAL` - for lock variables used for critical sections	|
+| `token`	| `intent(out)` An opaque pointer identifying the coarray.	|
+| `stat`	| `intent(out)` For allocatable coarrays, stores the `STAT=`; may be `NULL`	|
+| `errmsg`	| intent(out) When an error occurs, this will be set to an error message; may be `NULL`	|
+| `errmgs_len`	| the buffer size of errmsg.	|
+
+__TODO__:
+
+  - [ ] Check whether the locking should be handled like that and whether one needs
+  more, e.g. for locking types in DT?
+  - [ ] Check whether one needs an additional function for to register coarrays
+  which are in static memory and used without memory allocation, i.e. just to
+  register the address.
+  - [ ] Check whether we need an explicit `SYNC ALL` at the beginning of the main
+  program or whether we can do without.
+  - [ ] Does [TS18508] require more for `SAVE` within teams or within blocks?
+
+__Note__:
+  Non-allocatable coarrays have to be registered prior use from remote images.
+  In order to guarantee this, they have to be registered before the main
+  program. This can be achieved by creating constructor functions.  When using
+  `caf_register`, also non-allocatable coarrays the memory is allocated and no
+  static memory is used.
+
+  For normal coarrays, the returned pointer is used for accesses on the local
+  image. For lock types, the value shall only used for checking the allocation
+  status. Note that for critical blocks, the locking is only required on one
+  image; in the locking statement, the processor shall always pass always an
+  image index of one for critical-section lock variables (`CAF_REGTYPE_CRITICAL`).
+
+__GCC__:
+   (no special notes)
+
+__TODO__:
+   Change `errmsg_len` to `size_t`
+
+
+
+### 3.6  Deregistering coarrays ###
+
+```c
+void caf_deregister (const caf_token_t *token, int *stat, char *errmsg, size_t errmsg_len)
+```
+
+Called to free the memory of a coarray; the processor calls this function for
+automatic and explicit deallocation.  In case of an error, this function shall
+fail with an error message, unless the `STAT=` variable is not null.
+
+| Argument	| `intent`	| description	|
+| ------	| ------	| -----	|
+| `token`	| `inout`	| An opaque pointer identifying the coarray.	|
+| `stat`	| `out`	| For allocatable coarrays, stores the `STAT=`; may be `NULL`	|
+| `errmsg`	| `out`	| When an error occurs, this will be set to an error message, may be `NULL`	|
+| `errmgs_len`	| | the buffersize of `errmsg`.	|
+
+__Note__:
+  The implementation is permitted to set the token to `NULL`. However, it is not required to do so.
+  For nonalloatable coarrays this function is never called.  If a cleanup is required, it has to be handled via the finish, stop and error stop functions, and via destructors.
+
+__GCC__:
+   (no special notes)
+
+__TODO__:
+   Change `errmsg_len` to `size_t`
+
+
+### 3.7  Sending data from a local image to a remote image ###
+
+```c
+void caf_send (caf_token_t token, size_t offset, int image_index,
+               gfc_descriptor_t *dest, caf_vector_t *dst_vector,
+               gfc_descriptor_t *src, int dst_kind, int src_kind)
+```
+
+Called to send a scalar, an array section or whole array from a local
+to a remote image identified by the `image_index`.
+
+| Argument	| description	|
+| ------	| ------	|
+| `token`	| `intent(in)`  An opaque pointer identifying the coarray.	|
+| `offset`	| By which amount of bytes the actual data is shifted compared to the base address of the coarray.	|
+| `image_index`	| The ID of the remote image; must be a positive number.	|
+| `dest`	| `intent(in)` Array descriptor for the remote image for the bounds and the size. The `base_addr` shall not be accessed.	|
+| `dst_vector`	| `intent(in)`  If not `NULL`, it contains the vector subscript of the destination array; the values are relative to the dimension triplet of the dest argument.	|
+| `src`	| `intent(in)` Array descriptor of the local array to be transferred to the remote image	|
+| `dst_kind`	| Kind of the destination argument	|
+| `src_kind`	| Kind of the source argument	|
+
+__Note__:
+  It is permitted to have `image_id` equal the current image; the memory of the
+  send-to and the send-from might (partially) overlap in that case. The
+  implementation has to take care that it handles this case. Note that the
+  assignment of a scalar to an array is permitted. In addition, the library has
+  to handle numeric-type conversion and for strings, padding and different
+  `character` kinds.
+
+__GCC__:
+  Currently, it uses gfortran's private array descriptor. A change to [TS29113]'s
+  array descriptor is planned; when that's done, the additional kind arguments
+  will be removed.
+  Note that the kind arguments permit to distiniguish the `character` kinds and
+  `real`/`complex` kinds 10 and 16, which have the same byte size.
+
+
+__TODO__ `FOR SEND*`:
+
+  - [ ] Wait is missing
+  - [ ] Assignment to an address instead of using a token, to handle
+    `caf[i]%allocatable%alloc_array(:,:) = ...`
+    Or some other means to handle those.
+  - [ ] Image index: How to handle references to other TEAMS?
+
+__OTHER TODOs__:
+
+- [ ] 3.x TODO: Handle `GET` and remote-to-remote communication
+- [ ] 3.y TODO: Handle `ATOMIC`, `LOCK`, `CRITICAL`
+- [ ] 3.z TODO Teams and error recovery
+
+### 3.8  Getting data from a remote image ###
+
+```c
+void caf_get_desc (caf_token_t token, size_t offset,
+                   int image_index, gfc_descriptor_t *src,
+                   caf_vector_t *src_vector, gfc_descriptor_t *dest,
+                   int src_kind, int dst_kind)
+```
+
+Called to get an array section or whole array from a a remote,
+image identified by the `image_index`.
+
+| Argument	| description	|
+| ------	| ------	|
+| `token`	| `intent(in)` An opaque pointer identifying the coarray.	|
+| `offset`	| By which amount of bytes the actual data is shifted compared to the base address of the coarray.	|
+| `image_index`	| The ID of the remote image; must be a positive number.	|
+| `dest`	| `intent(out)` Array descriptor of the local array to which the data will be transferred	|
+| `src`	| `intent(in)` Array descriptor for the remote image for the bounds and the size. The `base_addr` shall not be accessed.
+| `src_vector`	| `intent(int)` If not `NULL`, it contains the vector subscript of the destination array; the values are relative to the dimension triplet of the dest argument.	|
+| `dst_kind`	| Kind of the destination argument	|
+| `src_kind`	| Kind of the source argument	|
+
+__Note__:
+  It is permitted to have `image_id` equal the current image; the memory of the
+  send-to and the send-from might (partially) overlap in that case. The
+  implementation has to take care that it handles this case. Note that the
+  library has to handle numeric-type conversion and for strings, padding
+  and different `character` kinds.
+
+__GCC__:
+  Currently, it uses gfortran's private array descriptor. A change to [TS29113]'s
+  array descriptor is planned; when that's done, the additional kind arguments
+  will be removed.
+  Note that the kind arguments permit to distinguish the `character` kinds and
+  `real`/`complex` kinds 10 and 16, which have the same byte size.
+
+
+### 3.9  Sending data between remote images ###
+
+```c
+void caf_sendget (caf_token_t dst_token, size_t dst_offset,
+                  int dst_image_index, gfc_descriptor_t *dest,
+                  caf_vector_t *dst_vector, caf_token_t src_token,
+                  size_t src_offset, int src_image_index,
+                  gfc_descriptor_t *src, caf_vector_t *src_vector,
+                  int dst_kind, int src_kind)
+```
+
+Called to send a scalar, an array section or whole array from a remote image
+identified by the `src_image_index` to a remote image identified by the
+`dst_image_index`.
+
+| Argument	| description	|
+| ------	| ------	|
+| `dst_token`	| `intent(in)`  An opaque pointer identifying the destination coarray.	|
+| `dst_offset`	| By which amount of bytes the actual data is shifted compared to the base address of the destination coarray.	|
+| `dst_image_index`	| The ID of the destination remote image; must be a positive number.	|
+| `dest`	| `intent(in)` Array descriptor for the destination remote image for the bounds and the size. The `base_addr` shall not be accessed.	|
+| `dst_vector`	| `intent(int)`  If not NULL, it contains the vector subscript of the destination array; the values are relative to the dimension triplet of the dest argument.	|
+| `src_token`	| `intent(in)` An opaque pointer identifying the source coarray.	|
+| `src_offset`	| By which amount of bytes the actual data is shifted compared to the base address of the source coarray.	|
+| `src_image_index`	| The ID of the source remote image; must be a positive number.	|
+| `src`	| `intent(in)` Array descriptor of the local array to be transferred to the remote image	|
+| `src_vector`	| `intent(in)` Array descriptor of the local array to be transferred to the remote image	|
+| `dst_kind`	| Kind of the destination argument	|
+| `src_kind`	| Kind of the source argument	|
+
+__Note__:
+  It is permitted to have `image_id` equal the current image; the memory of the
+  send-to and the send-from might (partially) overlap in that case. The
+  implementation has to take care that it handles this case. Note that the
+  assignment of a scalar to an array is permitted. In addition, the library has
+  to handle numeric-type conversion and for strings, padding and different
+  `character` kinds.
+
+__GCC__:
+  Currently, it uses gfortran's private array descriptor. A change to [TS29113]'s
+  array descriptor is planned; when that's done, the additional kind arguments
+  will be removed.
+  Note that the kind arguments permit to distinguish the `character` kinds and
+  `real`/`complex` kinds 10 and 16, which have the same byte size.
+
+
+
+### 3.10  Barriers ###
+
+### 3.10.1  All-Image Barrier ###
+
+```c
+void caf_sync_all (int *stat, char *errmsg, size_t errmsg_len)
+```
+
+Barrier which waits for all other images, pending asynchronous communication
+and other data transfer.
+
+| Argument	| description	|
+| ------	| ------	|
+| `stat`	| Status variable, if `NULL`, failures are fatal. If non-null, assigned 0 on success, and a stat code (cf. 2.3) in case of an error.	|
+| `errmsg`	| If not NULL: Ignored unless stat is present; unmodified when successful, otherwise, an error message is copied into the variable.	|
+| `errmsg_len`	| Maximal length of the error string, which is not '\0' terminated. The string should be padded by blanks.	|
+
+__Note__:
+  For portability, consider only using 7bit ASCII characters in the error
+  message.
+
+__GCC__:
+  Implemented in GCC 4.x using an int argument for the length.
+  Currently, `size_t` is not implemented.
+
+
+
+### 3.10.2  Barrier for Selected Images ###
+
+```c
+void sync_images (int count, int images[], int *stat,
+                  char *errmsg, size_t errmsg_len)
+```
+
+| Argument	| description	|
+| ------	| ------	|
+| `count`	| Size of the array "images"; has value -1 for `sync images(*)` and value 0 for a zero-sized array.	|
+| `image`	| list of images to be synced with.	|
+| `stat`	| Status variable, if NULL, failures are fatal. If non-null, assigned 0 on success, and a stat code (cf. 2.3) in case of an error.	|
+| `errmsg`	| If not `NULL`: Ignored unless stat is present; unmodified when successful, otherwise, an error message is copied into the variable.	|
+| `errmsg_len`	| Maximal length of the error string, which is not `\0` terminated. The string should be padded by blanks.	|
+
+__Note__:
+  For portability, consider only using 7bit ASCII characters in the error
+  message. Note that the list can contain also the ID of `this_image` or can be
+  an empty set. Example use is that image 1 syncs with all others (i.e `sync images(*)`) and the others sync only with that image (`sync image(1)`). Or
+  for point-to point communication (`sync image([left_image, right_image]`).
+
+__GCC__:
+  Implemented in GCC 4.x using an int argument for the error-string length.
+  Currently, `size_t` is not implemented.
+
+### 3.11  Error abort ###
+
+```c
+void error_stop_str (const char *string, int32_t str_len);
+void error_stop (int32_t exit_error_code)
+```
+
+__TODO__
+
+  - [ ] Fix this description by filling-in the missing bits
+  - [ ] `STOP` vs `ERROR STOP` handling. Currently, `STOP` calls `finalize` and then the
+    normal `STOP` while for `ERROR STOP` directly calls the library
+  - [ ] F2008 requires that one prints the raised exceptions with `STOP` and `ERROR
+    STOP`. libgfortran's `STOP` and `ERROR STOP` do so - the current implementation
+    for `ERROR STOP` does not.
+
+
+### 3.11  Locking and unlocking ###
+
+#### 3.11.1  Locking a lock variable ####
+
+```c
+void caf_lock (caf_token_t token, size_t index, int image_index,
+               int *aquired_lock, int *stat, char *errmsg,
+               int errmsg_len)
+```
+
+Acquire a lock on the given image on a scalar locking variable or for the
+given array element for an array-valued variable. If the `acquired_lock`
+is `NULL`, the function return after having obtained the lock. If it is
+non-null, the result is is assigned the value true (one) when the lock could be
+obtained and false (zero) otherwise.  Locking a lock variable which has already
+been locked by the same image is an error.
+
+| Argument	| arguments	|
+| ------	| ------	|
+| `token`	| `intent(in)` An opaque pointer identifying the coarray.	|
+| `index`	| Array index; first array index is 0. For scalars, it is always 0.	|
+| `image_index`	| The ID of the remote image; must be a positive number.	|
+| `aquired_lock`	| `intent(out)` If not NULL, it returns whether lock could be obtained	|
+| `stat`	| `intent(out)` For allocatable coarrays, stores the `STAT=`; may be NULL	|
+| `errmsg`	| intent(out) When an error occurs, this will be set to an error message; may be NULL	|
+| `errmsg_len`	| the buffer size of errmsg.	|
+
+__Note__:
+  This function is also called for critical sections; for those, the array index
+  is always zero and the image index is one.  Libraries are permitted to use other
+  images for critical-section locking variables.
+
+__GCC__:
+   (no special notes)
+
+__TODO__:
+   Change `errmsg_len` to `size_t`
+
+
+#### 3.11.2  Unlocking a lock variable ####
+
+```c
+void caf_unlock (caf_token_t token, size_t index, int image_index,
+                 int *stat, char *errmsg, int errmsg_len)
+```
+
+Release a lock on the given image on a scalar locking variable or for the
+given array element for an array-valued variable. Unlocking a lock variable
+which is unlocked or has been locked by a different image is an error.
+
+| Argument	| description	|
+| ------	| ------	|
+| `token`	| `intent(in)` An opaque pointer identifying the coarray.	|
+| `index`	| Array index; first array index is 0. For scalars, it is always 0.	|
+| `image_index`	| The ID of the remote image; must be a positive number.	|
+| `stat`	| `intent(out)` For allocatable coarrays, stores the `STAT=`; may be `NULL`	|
+| `errmsg`	| `intent(out)` When an error occurs, this will be set to an error message; may be `NULL`	|
+| `errmsg_len`	| the buffer size of `errmsg`.	|
+
+__Note__:
+  This function is also called for critical sections; for those, the array index
+  is always zero and the image index is one.  Libraries are permitted to use other
+  images for critical-section locking variables.
+
+__GCC__:
+   (no special notes)
+
+__TODO__:
+   Change `errmsg_len` to `size_t`
+
+[Hyperlinks]:#
+
+[TS29113]: ftp://ftp.nag.co.uk/sc22wg5/n1901-n1950/n1942.pdf
+[TS18508]: http://isotc.iso.org/livelink/livelink?func=ll&objId=17288706&objAction=Open
+[To Do]: #to-do
+[Implementation status]: #implementation-status
+[Definitions and types]: #definitions-and-types
+[Provided functions]: #provided-functions
+[pdf img]: https://img.shields.io/badge/PDF-CAF_ABI.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..ab094fb
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,212 @@
+cmake_minimum_required(VERSION 3.0)
+
+# Set the type/configuration of build to perform
+set ( CMAKE_CONFIGURATION_TYPES "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "CodeCoverage" )
+set ( CMAKE_BUILD_TYPE "Release"
+  CACHE STRING "Select which configuration to build." )
+set_property ( CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CMAKE_CONFIGURATION_TYPES} )
+
+#Name project and specify source languages
+project(opencoarrays VERSION 1.6.0 LANGUAGES C Fortran)
+
+#Print an error message on an attempt to build inside the source directory tree:
+if ("${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
+  message(FATAL_ERROR "ERROR! "
+    "CMAKE_CURRENT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
+    " == CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}"
+    "\nThis archive does not support in-source builds:\n"
+    "You must now delete the CMakeCache.txt file and the CMakeFiles/ directory under"
+    "the 'src' source directory or you will not be able to configure correctly!"
+    "\nYou must now run something like:\n"
+    "  $ rm -r CMakeCache.txt CMakeFiles/"
+    "\n"
+    "Please create a directory outside the opencoarrays source tree and build under that outside directory "
+    "in a manner such as\n"
+    "  $ mkdir build-opencarrays\n"
+    "  $ cd build-opencoarrays\n"
+    "  $ CC=mpicc FC=mpif90 cmake <path-to-opencoarrays-source-directory> -DCMAKE_INSTALL_PREFIX=<path-to-install-directory>\n"
+    "\nsubstituting the appropriate syntax for your shell (the above line assumes the bash shell)."
+    )
+endif()
+
+#Report untested Fortran compiler unless explicitly directed to build all examples.
+if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU" )
+  set(gfortran_compiler true)
+  # add_definitions(-DPREFIX_NAME=_gfortran_caf_)
+  set ( CMAKE_C_FLAGS_CODECOVERAGE "-fprofile-arcs -ftest-coverage -O0"
+    CACHE STRING "Code coverage C compiler flags")
+  set ( CMAKE_Fortran_FLAGS_CODECOVERAGE "-fprofile-arcs -ftest-coverage -O0"
+    CACHE STRING "Code coverage C compiler flags")
+else()
+  message(WARNING
+    "\n"
+    "Attempting to build with untested Fortran compiler: ${CMAKE_Fortran_COMPILER_ID}. "
+    "Please report any failures to opencoarrays at googlegroups.com\n\n"
+  )
+endif()
+
+if (NOT (CMAKE_VERSION VERSION_LESS 3.3.1))
+  # Detect Fortran compiler version directly
+  if(gfortran_compiler AND (CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER 5.0.0))
+    set(opencoarrays_aware_compiler true)
+    add_definitions(-DPREFIX_NAME=_gfortran_caf_)
+  else()
+    set(opencoarrays_aware_compiler false)
+    add_definitions(-DPREFIX_NAME=_caf_extensions_)
+  endif()
+  if(gfortran_compiler AND (CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 5.4))
+    # GCC patch to fix issue accepted for 5.4 release
+    # See https://github.com/sourceryinstitute/opencoarrays/issues/28 and
+    # https://groups.google.com/forum/#!msg/opencoarrays/RZOwwYTqG80/46S9eL696dgJ
+    message( STATUS "Disabling optimization flags due to GCC < 5.4 bug")
+    set(CMAKE_Fortran_FLAGS_RELEASE -O0
+      CACHE STRING "Flags used by the compiler during release builds." FORCE)
+    set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-g -DNDEBUG -O0"
+      CACHE STRING "Flags used by the compiler during release builds with debug info" FORCE)
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -O0")
+  endif()
+else()
+  # Use the C compiler version as a proxy for the Fortran compiler version (won't work with NAG)
+  if(gfortran_compiler AND (CMAKE_C_COMPILER_VERSION VERSION_GREATER 5.0.0))
+    set(opencoarrays_aware_compiler true)
+    add_definitions(-DPREFIX_NAME=_gfortran_caf_)
+  else()
+    set(opencoarrays_aware_compiler false)
+    add_definitions(-DPREFIX_NAME=_caf_extensions_)
+  endif()
+  if(gfortran_compiler AND (CMAKE_C_COMPILER_VERSION VERSION_LESS 5.4))
+    # GCC patch to fix issue accepted for the 5.4 release
+    # See https://github.com/sourceryinstitute/opencoarrays/issues/28 and
+    # https://groups.google.com/forum/#!msg/opencoarrays/RZOwwYTqG80/46S9eL696dgJ
+    message( STATUS "Disabling optimization flags due to GCC < 5.4 bug")
+    set(CMAKE_Fortran_FLAGS_RELEASE -O0
+      CACHE STRING "Flags used by the compiler during release builds." FORCE)
+    set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-g -DNDEBUG -O0"
+      CACHE STRING "Flags used by the compiler during release builds with debug info" FORCE)
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -O0")
+  endif()
+endif()
+
+if(gfortran_compiler)
+  set(CMAKE_REQUIRED_FLAGS "-fcoarray=single -ffree-form")
+endif()
+include(CheckFortranSourceCompiles)
+CHECK_Fortran_SOURCE_COMPILES("
+  program main
+    implicit none
+    integer :: i
+    i = this_image()
+  end program
+" Check_Simple_Coarray_Fortran_Source_Compiles)
+if(gfortran_compiler)
+  unset(CMAKE_REQUIRED_FLAGS)
+endif()
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+add_subdirectory(src)
+
+#-----------------------------------------------------
+# Publicize installed location to other CMake projects
+#-----------------------------------------------------
+install(EXPORT OpenCoarraysTargets
+  NAMESPACE
+    OpenCoarrays::
+  DESTINATION
+    lib/cmake/opencoarrays
+)
+include(CMakePackageConfigHelpers) # standard CMake module
+write_basic_package_version_file(
+  "${CMAKE_CURRENT_BINARY_DIR}/OpenCoarraysConfigVersion.cmake"
+  VERSION "${opencoarrays_VERSION}"
+  COMPATIBILITY AnyNewerVersion
+)
+configure_file("${CMAKE_SOURCE_DIR}/cmake/pkg/OpenCoarraysConfig.cmake.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/OpenCoarraysConfig.cmake" @ONLY)
+
+install(
+  FILES
+    "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/OpenCoarraysConfig.cmake"
+    "${CMAKE_CURRENT_BINARY_DIR}/OpenCoarraysConfigVersion.cmake"
+  DESTINATION
+    lib/cmake/opencoarrays
+)
+
+add_library(OpenCoarrays INTERFACE)
+target_compile_options(OpenCoarrays INTERFACE -fcoarray=lib)
+target_link_libraries(OpenCoarrays INTERFACE caf_mpi)
+
+install(DIRECTORY  ${CMAKE_BINARY_DIR}/mod DESTINATION .)
+
+#------------------------------------------
+# Add portable unistall command to makefile
+#------------------------------------------
+# Adapted from the CMake Wiki FAQ
+configure_file ( "${CMAKE_SOURCE_DIR}/cmake/uninstall.cmake.in" "${CMAKE_BINARY_DIR}/uninstall.cmake"
+    @ONLY)
+
+add_custom_target ( uninstall
+  COMMAND ${CMAKE_COMMAND} -P "${CMAKE_BINARY_DIR}/uninstall.cmake" )
+
+
+enable_testing()
+
+function(add_mpi_test name num_mpi_proc path)
+   set(test_parameters ${MPIEXEC_NUMPROC_FLAG} ${num_mpi_proc} )
+   add_test(NAME ${name} COMMAND ${MPIEXEC} ${test_parameters} "${path}")
+   set_property(TEST ${name} PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+endfunction(add_mpi_test)
+
+set(tests_root ${CMAKE_CURRENT_BINARY_DIR}/src/tests)
+
+if(opencoarrays_aware_compiler)
+  # Unit tests targeting each libcaf_mpi function, argument, and branch of code
+  add_mpi_test(initialize_mpi 2 ${tests_root}/unit/init_register/initialize_mpi)
+  add_mpi_test(register 2 ${tests_root}/unit/init_register/register)
+  add_mpi_test(register_rename_me 2 ${tests_root}/unit/init_register/register_rename_me)
+  add_mpi_test(register_rename_me_too 2 ${tests_root}/unit/init_register/register_rename_me_too)
+  add_mpi_test(allocate_as_barrier 2 ${tests_root}/unit/init_register/allocate_as_barrier)
+  if (NOT ( APPLE AND ( DEFINED ENV{TRAVIS} ) ) )
+    add_mpi_test(allocate_as_barrier_proc 2 ${tests_root}/unit/init_register/allocate_as_barrier_proc)
+  endif ()
+  add_mpi_test(get_array 2 ${tests_root}/unit/send-get/get_array)
+  add_mpi_test(send_array 2 ${tests_root}/unit/send-get/send_array)
+  add_mpi_test(get_with_offset_1d 2 ${tests_root}/unit/send-get/get_with_offset_1d)
+  add_mpi_test(whole_get_array 2 ${tests_root}/unit/send-get/whole_get_array)
+  add_mpi_test(strided_get 2 ${tests_root}/unit/send-get/strided_get)
+  add_mpi_test(co_sum 4 ${tests_root}/unit/collectives/co_sum_test)
+  add_mpi_test(co_broadcast 4 ${tests_root}/unit/collectives/co_broadcast_test)
+  add_mpi_test(co_min 4 ${tests_root}/unit/collectives/co_min_test)
+  add_mpi_test(co_max 4 ${tests_root}/unit/collectives/co_max_test)
+  add_mpi_test(syncall 32 ${tests_root}/unit/sync/syncall)
+  add_mpi_test(syncimages 32 ${tests_root}/unit/sync/syncimages)
+  add_mpi_test(duplicate_syncimages 8 ${tests_root}/unit/sync/duplicate_syncimages)
+  add_mpi_test(co_reduce 4 ${tests_root}/unit/collectives/co_reduce_test)
+#  add_mpi_test(syncimages_status 32 ${tests_root}/unit/sync/syncimages_status)
+
+  # Integration tests verifying the use of libcaf_mpi in applications
+  add_mpi_test(hello_multiverse 2 ${tests_root}/integration/coarrayHelloWorld/hello_multiverse)
+  add_mpi_test(coarray_burgers_pde 2 ${tests_root}/integration/pde_solvers/coarrayBurgers/coarray_burgers_pde)
+  add_mpi_test(co_heat 2 ${tests_root}/integration/pde_solvers/coarrayHeatSimplified/co_heat)
+  if ( ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "x86_64") AND ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") )
+    if ( NOT (DEFINED ENV{TRAVIS}))
+      add_mpi_test(coarray_navier_stokes 2 ${tests_root}/integration/pde_solvers/navier-stokes/coarray_navier_stokes)
+      set_property(TEST coarray_navier_stokes PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+    endif()
+  endif()
+else()
+  add_test(co_sum_extension ${tests_root}/unit/extensions/test-co_sum-extension.sh)
+  set_property(TEST co_sum_extension PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+
+  add_test(co_broadcast_extension ${tests_root}/unit/extensions/test-co_broadcast-extension.sh)
+  set_property(TEST co_broadcast_extension PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+
+  add_test(co_min_extension ${tests_root}/unit/extensions/test-co_min-extension.sh)
+  set_property(TEST co_min_extension PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+
+  add_test(co_max_extension ${tests_root}/unit/extensions/test-co_max-extension.sh)
+  set_property(TEST co_max_extension PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+
+  add_test(co_reduce_extension ${tests_root}/unit/extensions/test-co_reduce-extension.sh)
+  set_property(TEST co_reduce_extension PROPERTY PASS_REGULAR_EXPRESSION "Test passed.")
+endif()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..d4fc519
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+<a name="top"> </a>
+
+Contributing to OpenCoarrays
+============================
+
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/CONTRIBUTING.pdf)
+
+Download this file as a PDF document
+[here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/CONTRIBUTING.pdf).
+
+- [Reporting Defects](#reporting-defects)
+- [Requesting Enhancements](#requesting-enhancements)
+- [Helping Out](#helping-out)
+  - [Outside Contributors](#outside-contributors)
+- [OpenCoarrays Branches](#opencoarrays-branches)
+  - [Master](#master)
+  - [Devel](#devel)
+
+Reporting Defects
+-----------------
+
+If you encounter problems during the course of [Installing] OpenCoarrays or [using OpenCoarrays], please take the following actions:
+
+ 1. Search the [issues] page (including [closed issues]) to see if anyone has encountered the same problem. If so add your experience to that thread.
+ 2. Search the [mailing list] to see if the issue has been discussed there.
+ 3. If unable to resolve the problem, please file a [new issue] and be sure to include the following information:
+   - Fortran and companion C compiler, including version number, being used with OpenCoarrays
+   - Communication library being used e.g., OpenMPI, MVAPICH or GASNet and the version number
+   - Open Coarrays version number, or if building from `master`, commit SHA (`caf --version`)
+   - Conditions required to reproduce the problem:
+     - OS
+     - Type of machine/hardware the code is running on
+     - Number of MPI ranks/processing elements/coarray images being run on
+     - How the code was compiled, including all flags and commands
+     - Minimal reproducer code (a few lines) required to trigger the bug
+  4. Any help you can provide diagnosing, isolating and fixing the problem is appreciated! Please see the [helping out] section for more information.
+
+Requesting Enhancements
+-----------------------
+
+If you would like OpenCoarrays to support a new communication library, OS, or have any other suggestions for its improvement, please take the following action:
+
+ 1. Search the [issues] page and [mailing list] to see if the feature or enhancement has already been requested.
+ 2. If not, please file a [new issue], and clearly explain your proposed enhancement.
+ 3. If you are able to help out in the implementation or testing of the proposed feature, please see the [helping out] section of this document.
+
+Helping Out
+-----------
+
+Thank you for your interest in contributing to OpenCoarrays! Your help is very appreciated! Below are some tips and guidelines to get started.
+
+### Outside Contributors ###
+
+Here is a checklist to help you get started contributing to OpenCoarrays and walk you through the process:
+
+ - [ ] Take a look at the [issues] page. Make sure that you're not about to duplicate someone else's work.
+ - [ ] Post a [new issue] discussing the changes you're proposing to implement; whether bug fix(es) or enhancement(s)/feature request(s)--or give us a heads up that you are going to start work on [an open issue].
+ - [ ] Please [Fork] the [OpenCoarrays repo] to your private repository.
+ - [ ] Next [Create a branch] and make sure to include the issue number(s) in the branch name, for example: `issue-53-fix-install-dir-logic` or `fix-typo-#23`
+ - [ ] Configure your local repository with the whitespace settings (and git hooks to enforce these) by running `./developer-scripts/setup-git.sh`. (Add the `--global` flag to this script to use these settings across all your new repositories, or newly cloned repositories.)  Pull requests introducing errant spaces and non-printing characters will not be accepted until these problems are addressed.
+ - [ ] Make your changes and commit them to your local repository, following these guidelines:
+   - [ ] Each commit should be a logically atomic, self-consistent, cohesive set of changes.
+   - [ ] The code should compile and pass all tests after each commit.
+   - [ ] The [commit message] should follow [these guidelines]:
+     - [ ] First line is directive phrase, starting with a capitalized imperative verb, and is no longer than 50 characters summarizing your commit
+     - [ ] Next line, if necessary is blank
+     - [ ] Following lines are all wrapped at 72 characters and can include additional paragraphs, bulleted lists, etc.
+     - [ ] Use [Github keywords] where appropriate, to indicate the commit resolves an open issue.
+ - [ ] Please do you best to keep a [clean and coherent history]. `git add -p ...`, `git commit --amend` and `git rebase --interactive <root-ref>` can be helpful to rework your commits into a cleaner, clearer state.
+ - [ ] Next, [open up a pull request] where the base branch is [`master`] or [`devel`] as appropriate
+ - [ ] Please be patient and responsive to requests and comments from SourceryInstitute (SI) team members. You may be asked to amend or otherwise alter commits, or push new commits to your branch.
+ - [ ] Make sure that all the automated [Travis-CI tests] pass
+ - [ ] Sign the [Contributor License Agreement (CLA)] by clicking the "details" link to the right of the `licence/cla` check and following the directions on the CLA assistant webpage
+
+OpenCoarrays Branches
+---------------------
+
+OpenCoarrays uses the [Github flow] workflow. There are [a number of resources] available to learn about the [Github flow] workflow, including a [video]. The gist of it is that the `master` branch is always deployable and deployed. The means at anytime, a new tagged release could be shipped using the `master` branch. For major changes that introduce experimental, or disruptive changes, we have a semi-stable `devel` branch.
+
+### Master ###
+
+The `master` branch should remain in pristine, stable condition all of the time. Any changes are applied atomically via pull requests. It should be assumed that customers are using the code on this branch, and great care should be taken to ensure its stability. Most bug fixes and incremental improvements will get merged into the `master` branch first, and then also the `devel` branch.
+
+
+### Devel ###
+
+This is the development branch, akin to GCC's `trunk`. Both of `devel` and `master` branches are protected, but `devel` will eventually be merged into `master` when the next major release happens, but until then it is a stable, forward looking branch where experimental features and major changes or enhancements may be applied and tested. Just as with `master` all changes are applied atomically as pull requests.
+
+[Links]: #
+[video]: https://youtu.be/EwWZbyjDs9c?list=PLg7s6cbtAD17uAwaZwiykDci_q3te3CTY
+[a number of resources]: http://scottchacon.com/2011/08/31/github-flow.html
+[Github flow]: https://guides.github.com/introduction/flow/
+[Travis-CI tests]: https://travis-ci.org/sourceryinstitute/opencoarrays/pull_requests
+[Contributor License Agreement (CLA)]: https://cla-assistant.io/sourceryinstitute/opencoarrays
+[`master`]: https://github.com/sourceryinstitute/opencoarrays
+[`devel`]: https://github.com/sourceryinstitute/opencoarrays/tree/devel
+[open up a pull request]: https://github.com/sourceryinstitute/opencoarrays/compare
+[clean and coherent history]: https://www.reviewboard.org/docs/codebase/dev/git/clean-commits/
+[Github keywords]: https://help.github.com/articles/closing-issues-via-commit-messages/#closing-an-issue-in-a-different-repository
+[commit message]: https://robots.thoughtbot.com/5-useful-tips-for-a-better-commit-message
+[these guidelines]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html
+[an open issue]: https://github.com/sourceryinstitute/opencoarrays/issues
+[Create a branch]: https://help.github.com/articles/creating-and-deleting-branches-within-your-repository/
+[OpenCoarrays repo]: https://github.com/sourceryinstitute/opencoarrays#fork-destination-box
+[Pull Request]: https://help.github.com/articles/using-pull-requests/
+[Fork]: https://help.github.com/articles/fork-a-repo/
+[helping out]: #helping-out
+[closed issues]: https://github.com/sourceryinstitute/opencoarrays/issues?q=is%3Aissue+is%3Aclosed
+[Installing]: ./INSTALLING.md
+[issues]: https://github.com/sourceryinstitute/opencoarrays/issues
+[mailing list]: https://groups.google.com/forum/#!forum/opencoarrays
+[using OpenCoarrays]: ./GETTING_STARTED.md
+[new issue]: https://github.com/sourceryinstitute/opencoarrays/issues/new
+[pdf img]: https://img.shields.io/badge/PDF-CONTRIBUTING.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
new file mode 100644
index 0000000..f023edb
--- /dev/null
+++ b/GETTING_STARTED.md
@@ -0,0 +1,132 @@
+<a name="top"> </a>
+
+[This document is formatted with GitHub-Flavored Markdown.                       ]:#
+[For better viewing, including hyperlinks, read it online at                     ]:#
+[https://github.com/sourceryinstitute/opencoarrays/blob/master/GETTING_STARTED.md]:#
+
+Getting Started
+===============
+
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/GETTING_STARTED.pdf)
+
+Download this file as a PDF document
+[here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/GETTING_STARTED.pdf).
+
+* [The caf compiler wrapper]
+* [A sample basic workflow]
+* [An advanced workflow]
+
+The caf compiler wrapper
+--------------------------
+
+The preferred method for compiling a CAF program is by invoking the `caf` bash script
+that the OpenCoarrays CMake scripts install in the `bin` subdirectory of the installation
+path. This is an experimental script with limited but useful capabilities that will
+grow over time.  Please submit bug reports and feature requests via our [Issues] page.
+
+The `caf` script liberates the source code and workflow from explicit dependence on the
+underlying compiler and communication library in the following ways:
+
+1. With an OpenCoarrays-aware (OCA) CAF compiler, the `caf` script passes the unmodified
+   source code to the underlying compiler with the necessary arguments for building a
+   CAF program, embedding the paths to OpenCoarrays libraries (e.g., `libcaf_mpi.a`) installed
+   in the `lib` subdirectory of the OpenCoarrays installation path.  The `caf` script also
+   embeds the path to the relevant module file in the `mod` subdirectory of the installation
+   path (e.g., `opencoarrays.mod`).  This supports use association with module entities via
+   `use opencoarrays`.
+2. With a non-CAF compiler (including gfortran 4.9), `caf` supports a subset of CAF by
+   replacing CAF statements with calls to procedures in the [opencoarrays module] before
+   passing the source code to the compiler.
+
+When using GCC 4.9, we recommend using the `use` statement's `only` clause to
+avoid inadvertent procedure name clashes between OpenCoarrays procedures and their
+GCC counterparts.  For example, use `use opencoarrays, only : co_reduce`.
+
+With a non-OCA and OCA CAF compilers, the extensions that `caf` imports include the collective
+subroutines proposed for Fortran 2015 in the draft Technical Specification [TS 18508]
+_Additional Parallel Features in Fortran_.
+
+The latter use case provides an opportunity to mix a compiler's CAF support with that of OpenCoarrays.
+For example, a non-OCA CAF compiler, such as the Cray or Intel compilers, might support all of a
+program's coarray square-bracket syntax, while OpenCoarrays supports the same program's calls to
+collective subroutine such as `co_sum` and `co_reduce`.
+
+A sample basic workflow
+-----------------------
+
+The following program listing, compilation, and execution workflow exemplify
+the use of an OCA compiler (e.g., gfortran 5.1.0 or later) in a Linux bash shell
+with the `bin` directory of the chosen installation path in the user's PATH
+environment variable:
+
+```
+$ cat tally.f90
+      program main
+        use iso_c_binding, only : c_int
+        use iso_fortran_env, only : error_unit
+        implicit none
+        integer(c_int) :: tally
+        tally = this_image() ! this image's contribution
+        call co_sum(tally)
+        verify: block
+          integer(c_int) :: image
+          if (tally/=sum([(image,image=1,num_images())])) then
+             write(error_unit,'(a,i5)') "Incorrect tally on image ",this_image()
+             error stop
+          end if
+        end block verify
+        ! Wait for all images to pass the test
+        sync all
+        if (this_image()==1) print *,"Test passed"
+      end program
+$ caf tally.f90 -o tally
+$ cafrun -np 4 ./tally
+        Test passed
+```
+
+where "4" is the number of images to be launched at program start-up.
+
+An advanced workflow
+--------------------
+
+To extend the capabilities of a non-OCA CAF compiler (e.g., the Intel or Cray compilers),
+access the types and procedures of the [opencoarrays module] by use association.  We
+recommend using a `use` statement with an `only` clause to reduce the likelihood of a
+name clash with the compiler's native CAf support.  For example, insert the following
+at line 2 of `tally.f90` above:
+
+```fortran
+use opencoarrays, only : co_sum
+```
+
+To extend the capabilities of a non-CAF compiler (e.g., GCC 4.9), use an unqualified
+`use` statement with no `only` clause.  The latter practice reduces the likelihood of
+name clashes with the compiler's or programs existing capabilities.
+
+If the `caf` compiler wrapper cannot process the source code in question, invoke
+the underlying communication library directly:
+
+```
+mpif90 -fcoarray=lib -L/opt/opencoarrays/ tally.f90 \ -lcaf_mpi -o htally-I<OpenCoarrays-install-path>/mod
+```
+
+and also run the program with the lower-level communication library:
+
+```
+mpirun -np <number-of-images> ./tally
+```
+
+[Hyperlinks]:#
+
+[The caf compiler wrapper]: #the-caf-compiler-wrapper
+[A sample basic workflow]: #a-sample-basic-workflow
+[An advanced workflow]:  #an-advanced-workflow
+
+[Sourcery Store]: http://www.sourceryinstitute.org/store
+[Issues]: https://github.com/sourceryinstitute/opencoarrays/issues
+[opencoarrays module]: ./src/extensions/opencoarrays.F90
+[GCC]: http://gcc.gnu.org
+[TS 18508]: http://isotc.iso.org/livelink/livelink?func=ll&objId=17181227&objAction=Open
+[The caf compiler wrapper]: #the-caf-compiler-wrapper
+[The cafrun program launcher]: #the-cafrun-program-launcher
+[pdf img]: https://img.shields.io/badge/PDF-GETTING_STARTED.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000..9010ac5
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,249 @@
+<a name="top"> </a>
+
+[This document is formatted with GitHub-Flavored Markdown.               ]:#
+[For better viewing, including hyperlinks, read it online at             ]:#
+[https://github.com/sourceryinstitute/opencoarrays/blob/master/INSTALL.md]:#
+
+Installing OpenCoarrays
+=======================
+
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/INSTALL.pdf)
+
+Download this file as a PDF document
+[here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/INSTALL.pdf).
+
+ *  [End-User Installation]
+     * [Installation Script]
+     * [OS X]
+     * [Windows]
+     * [Linux]
+ *  [Advanced Installation from Source]
+     *  [Prerequisites]
+     *  [CMake]
+     *  [Make]
+ *  [Obtaining GCC]
+
+End-User Installation
+---------------------
+
+### Installation Script###
+
+As of release 1.2.0, users might consider installing by downloading and uncompressing
+a file from our [Releases] page and running the installation script in the top-level
+source directory:
+
+```
+tar xvzf opencoarrays-x.y.z.tar.gz
+cd opencoarrays
+./install.sh
+```
+
+Before installing OpenCoarrays, the above bash script will attempt to detect the presence
+of the default prequisite packages: [GCC], [MPICH] , and [CMake].  For additional details, see the [Prerequisites] section. If any of the
+aforementioned packages appear to be absent from the user's PATH environment variable,
+the [install.sh] script will attempt to download, build, and install any missing packages
+after asking permission to do so.  The script has been tested on Linux and OS X.  Please
+submit any related problems or questions to our [Issues] page.
+
+A complete installation should result in the creation of the following directories
+inside the installation path (.e.g, inside `build` in the above example):
+
+* `bin`: contains the compiler wrapper (`caf`), program launcher (`cafun`), and prerequisites builder (`build`)
+* `mod`: contains the `opencoarrays.mod` module file for use with non-OpenCoarrays-aware compilers
+* `lib`: contains the `libcaf_mpi.a` static library to which codes link for CAF support
+
+The remainder of this document explains other options that many end users will find
+simplest to obtain OpenCoarrays on OS X, Windows, or Linux without building OpenCoarrays
+from its source code.
+
+### OS X ###
+
+OS X users might find it easiest to install OpenCoarrays using the [MacPorts]
+package management system.  After installing [MacPorts], type the following:
+
+```
+sudo port selfupdate
+sudo port upgrade outdated
+sudo port install opencoarrays
+```
+
+where the `sudo` command requires administrator privileges and where the first
+two steps above are required only if the [MacPorts] ports were last updated prior
+to 30 September 2015, when the OpenCoarrays port was incorporated into [MacPorts].
+Repeating the first two steps above will also update OpenCoarrays to the latest
+release.
+
+Please also install the `mpstats` port as follows:
+
+```
+sudo port install mpstats
+```
+
+which supports future OpenCoarrays development by providing download data the
+OpenCoarrays team can use in proposals for research grants and development
+contracts.
+
+### Windows ###
+
+Windows users will find it easiest to download the Lubuntu Linux virtual
+machine from the [Sourcery Institute Store].  The virtual machine boots inside
+the open-source [VirtualBox] virtualization package.  In addition to containing
+GCC 4.9, 5.2, and 6.0, MPICH, OpenMPI, and OpenCoarrays, the virtual machine
+contains dozens of other open-source software packages that support software
+development in modern Fortran.  See the [download and installation instructions]
+for a partial list of the included packages.
+
+Alternatively, if you desire to use OpenCoarrays under Cygwin, please submit a
+feature request via our [Issues] page.
+
+### Linux ###
+
+The [Arch Linux] distribution provides an [aur package] for installing OpenCoarrays.
+Users of other Linux distributions who prefer not to build OpenCoarrays from source might
+access OpenCoarrays via the the Lubuntu Linux virtual machine from the
+[Sourcery Institute Store] after installing the version of [VirtualBox] that is suitable
+for the relevant Linux distribution.  Alternatively, if you desire to install using other
+Linux package management software such as [yum] or [apt-get], please submit a feature
+request via our [Issues] page.
+
+Advanced Installation from Source
+--------------------
+
+### Prerequisites: ###
+
+The prerequisites below and their dependencies are recommended for the broadest coverage of CAF features. If a prerequisite is missing or outdated, the [install.sh] script will prompt the user for permission to download, compile, and install it. Before doing so, [install.sh] will verify whether that prerequisite's prerequisites are present and will recursively traverse the dependency tree until reaching an acceptable prerequisite or reaching the end of a branch.
+
+```
+opencoarrays
+├── cmake-3.4.0
+└── mpich-3.1.4
+    └── gcc-6.1.0
+        ├── flex-2.6.0
+        │   └── bison-3.0.4
+        │       └── m4-1.4.17
+        ├── gmp
+        ├── mpc
+        └── mpfr
+```
+
+If using the advanced [CMake] or [Make] builds detailed below, please ensure that these dependencies are met before attempting to build and install OpenCoarrays.
+
+### CMake ###
+
+[CMake] is the preferred build system.   CMake is a cross-platform Makefile generator that
+includes with the testing tool CTest.  To avoid cluttering or clobbering the source tree,
+our CMake setup requires that your build directory be any directory other than the top-level
+OpenCoarrays source directory.  In a bash shell, the following steps should build
+OpenCoarrays, install OpenCoarrays, build the tests, run the tests, and report the test results:
+
+```
+tar xvzf opencoarrays.tar.gz
+cd opencoarrays
+mkdir opencoarrays-build
+cd opencoarrays-build
+CC=mpicc FC=mpif90 cmake .. -DCMAKE_INSTALL_PREFIX=${PWD}
+make
+ctest
+make install
+```
+
+where the the first part of the cmake line sets the CC and FC environment variables
+and the final part of the same line defines the installation path as the present
+working directory (`opencoarrays-build`).  Please report any test failures via the
+OpenCoarrays [Issues] page.
+
+Advanced options (most users should not use these):
+
+    -DLEGACY_ARCHITECTURE=OFF enables the use of FFT libraries that employ AVX instructions
+    -DHIGH_RESOLUTION_TIMER=ON enables timers that tick once per clock cycle
+    -DCOMPILER_SUPPORTS_ATOMICS enables support for the proposed Fortran 2015 events feature
+    -DUSE_EXTENSIONS builds the [opencoarrays] module for use with non-OpenCoarrays-aware compilers
+    -DCOMPILER_PROVIDES_MPI is set automatically when building with the Cray Compiler Environment
+
+The first two flags above are not portable and the third enables code that is incomplete as
+of release 1.0.0.  The fourth is set automatically by the CMake scripts based on the compiler
+identity and version.
+
+### Make ###
+
+Unlike the Makefiles that CMake generates automatically for the chosen platform, static
+Makefiles require a great deal more maintenance and are less portable.  Also, the static
+Makefiles provided with OpenCoarrays lack several important capabilities.  In particular,
+they will not build the tests;  they will not build any of the infrastructure for compiling
+CAF source with non-OpenCoarrays-aware compilers (that infrastructure includes the
+[opencoarrays] module, the `caf` compiler wrapper, and the `cafrun` program launcher);
+nor do the static Makefiles provide a `make install` option so you will need to manually
+move the desired library from the corresponding source directory to your intended installation
+location as shown below.
+
+If CMake is unavailable, build and install with Make using steps such as the following:
+
+```
+tar xvzf opencoarrays.tar.gz
+cd opencoarray/src
+make
+mv mpi/libcaf_mpi.a <installation-path>
+```
+
+For the above steps to succeed, you might need to edit the [make.inc] file to match your
+system settings.  For example, you might need to remove the `-Werror` option from the
+compiler flags or name a different compiler.  In order to activate efficient strided-array
+transfer support, uncomment the `-DSTRIDED` flag inside the [make.inc] file.
+
+Obtaining GCC, MPICH, and CMake
+-------------------------------
+
+[GFortran Binaries] 5 binary builds are available at <https://gcc.gnu.org/wiki/GFortranBinaries>.  Also,
+the Lubuntu Linux virtual machine available for download in the [Sourcery Store] includes
+builds of GCC 4.9, 5.2, and 6.0.
+
+To build all prerequisites from source, including the current development branch of GCC,
+you might first try the running the provided [install.sh] script as described above in
+the [Installation Script] section.  Or try building each prerequisite from source as
+follows:
+
+```
+cd prerequisites
+CC=gcc FC=gfortran CXX=g++ ./build flex
+./build gcc
+CC=gcc FC=gfortran CXX=g++ ./build mpich
+./build cmake
+```
+
+where the second line builds the flex package that is required for building gcc from source.
+
+[Links]: #
+
+[End-User Installation]: #end-user-installation
+[Installation Script]: #installation-script
+[install.sh]: ./install.sh
+[OS X]: #os-x
+[ticket]: https://trac.macports.org/ticket/47806
+[Windows]: #windows
+[Linux]: #linux
+[Advanced Installation from Source]: #advanced-installation-from-source
+[Prerequisites]: #prerequisites
+[CMake]: #cmake
+[Make]: #make
+[Obtaining GCC]: #obtaining-gcc
+[Sourcery Store]: http://www.sourceryinstitute.org/store
+[Sourcery Institute Store]: http://www.sourceryinstitute.org/store
+[VirtualBox]: http://www.virtualbox.org
+[download and installation instructions]: http://www.sourceryinstitute.org/uploads/4/9/9/6/49967347/overview.pdf
+[yum]: http://yum.baseurl.org
+[apt-get]: https://en.wikipedia.org/wiki/Advanced_Packaging_Tool
+[Issues]: https://github.com/sourceryinstitute/opencoarrays/issues
+[make.inc]: ./src/make.inc
+[opencoarrays]: ./src/extensions/opencoarrays.F90
+[prerequisites]: ./prerequisites
+[MPICH]: http://www.mpich.org
+[MVAPICH]:http://mvapich.cse.ohio-state.edu
+[MacPorts]: http://www.macports.org
+[GCC]: http://gcc.gnu.org
+[TS18508 Additional Parallel Features in Fortran]: http://isotc.iso.org/livelink/livelink?func=ll&objId=17181227&objAction=Open
+[GFortran Binaries]:  https://gcc.gnu.org/wiki/GFortranBinaries#FromSource
+[Installing GCC]: https://gcc.gnu.org/install/
+[Arch Linux]: https://www.archlinux.org
+[aur package]: https://aur.archlinux.org/packages/opencoarrays/
+[Releases]: https://github.com/sourceryinstitute/opencoarrays/releases
+[pdf img]: https://img.shields.io/badge/PDF-INSTALL.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..fc4bd0d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,35 @@
+OpenCoarrays
+
+-- A Coarray Fortran application binary interface, application programmer
+    interface, and compiler wrapper.
+
+Copyright (c) 2015-2016, Sourcery Institute
+Copyright (c) 2015-2016, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2501743
--- /dev/null
+++ b/README.md
@@ -0,0 +1,136 @@
+<a name="top"> </a>
+
+[This document is formatted with GitHub-Flavored Markdown.              ]:#
+[For better viewing, including hyperlinks, read it online at            ]:#
+[https://github.com/sourceryinstitute/opencoarrays/blob/master/README.md]:#
+
+OpenCoarrays
+============
+
+[![CI Build Status][build img]](https://travis-ci.org/sourceryinstitute/opencoarrays)
+[![GitHub license][license img]](./LICENSE)
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/README.pdf)
+<!-- [![GitHub release][release img]](https://github.com/sourceryinstitute/opencoarrays/releases/latest) -->
+<!-- [![Release Downloads][download img]](https://github.com/sourceryinstitute/opencoarrays/releases) -->
+
+Download this file as a PDF document [here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/README.pdf).
+
+* [Overview](#overview)
+* [Downloads](#downloads)
+* [Compatibility](#compatibility)
+* [Prerequisites](#prerequisites)
+* [Installation](#installation)
+* [Getting Started](#getting-started)
+* [Contributing](#contributing)
+* [Status](#status)
+* [Support](#support)
+* [Acknowledgements](#acknowledgements)
+
+Overview
+--------
+[OpenCoarrays] is an open-source software project that supports the coarray Fortran (CAF) parallel programming features of the Fortran 2008 standard and several features proposed for Fortran 2015 in the draft Technical Specification [TS 18508] _Additional Parallel Features in Fortran_.
+
+OpenCoarrays provides a compiler wrapper (named `caf`), a runtime library (named `libcaf_mpi.a` by default), and an executable file launcher (named `cafrun`).  With OpenCoarrays-aware compilers, the compiler wrapper passes the provided source code to the chosen compiler (`mpif90` by default).  For non-OpenCoarrays-aware compilers, the wrapper transforms CAF syntax into OpenCoarrys procedure calls before invoking the chosen compiler on the transformed code.  The runtime library supports c [...]
+
+OpenCoarrays defines an application binary interface ([ABI]) that translates high-level communication and synchronization requests into low-level calls to a user-specified communication library.  This design decision liberates compiler teams from hardwiring communication-library choice into their compilers and it frees Fortran programmers to express parallel algorithms once and reuse identical CAF source with whichever communication library is most efficient for a given hardware platform [...]
+
+OpenCoarrays enables CAF application developers to express parallel algorithms without hardwiring a particular version of a particular communication library or library version into their codes.  Such abstraction makes application code less sensitive to the evolution of the underlying communication libraries and hardware platforms.
+
+Downloads
+---------
+<!--[![Release Downloads][download img]](https://github.com/sourceryinstitute/opencoarrays/releases/latest)-->
+
+Please see our [Releases] page.
+
+Compatibility
+-------------
+The GNU Compiler Collection ([GCC]) Fortran front end ([gfortran]) is OpenCoarrays-aware for release versions 5.1.0 and higher.  Users of other compilers, including earlier versions of gfortran, can access a limited subset of CAF features via the provided [opencoarrays module].  After installation, please execute the `caf` script (which is installed in the `bin` directory of the installation path) with no arguments to see a list of the corresponding limitations.  Please also notify the c [...]
+
+Prerequisites
+-------------
+We expect our LIBCAF_MPI library to be the default OpenCoarrays library.  LIBCAF_MPI is the most straightforward to install and use, the most robust in terms of its internal complexity, and the most frequently updated and maintained.  Building LIBCAF_MPI requires prior installation of an MPI implementation.  We recommend [MPICH] generally or, if available, [MVAPICH] for better performance. [OpenMPI] is another option.
+
+We offer an unsupported LIBCAF_GASNet alternative.  We intend for LIBCAF_GASNet to be an "expert" alternative capable of outperforming MPI for some applications on some platforms.  LIBCAF_GASNet requires greater care to configure and use and building LIBCAF_GASNet requires prior installation of [GASNet].
+
+Installation
+------------
+
+Please see the [INSTALL.md] file.
+
+Getting Started
+---------------
+
+To start using OpenCoarrays, please see the [GETTING_STARTED.md] file.
+
+Contributing
+------------
+
+Please see the [CONTRIBUTING.md] file.
+
+Status
+------
+
+Please see the [STATUS.md] file.
+
+Support
+-------
+
+* Please submit bug reports and feature requests via our [Issues] page.
+* Please submit questions regarding installation and use via our [Google Group] by signing into [Google Groups] or [subscribing] and sending email to [opencoarrays at googlegroups.com].
+
+Acknowledgements
+----------------
+We gratefully acknowledge support from the following institutions:
+
+* [National Center for Atmospheric Research] for access to the Yellowstone/Caldera supercomputers and for logistics support during the initial development of OpenCoarrays.
+* [CINECA] for access to Eurora/PLX for the project HyPS- BLAS under the ISCRA grant program for 2014.
+* [Google] for support of a related [Google Summer of Code] 2014 project.
+* The National Energy Research Scientific Computing Center ([NERSC]), which is supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC02-05CH11231, for access to the Hopper and Edison supercomputers under the OpenCoarrays project start allocation.
+* [Sourcery, Inc.], for financial support for the domain registration, web hosting, advanced development, and conference travel.
+
+[Hyperlinks]:#
+
+[Overview]: #overview
+[Downloads]: #downloads
+[Compatibility]: #compatibility
+[Prerequisites]: #prerequisites
+[Installation]: #installation
+[Contributing]: #contributing
+[Acknowledgements]: #acknowledgements
+
+
+[OpenCoarrays]: http://www.opencoarrays.org
+[ABI]: https://gcc.gnu.org/onlinedocs/gfortran/Coarray-Programming.html#Coarray-Programming
+[TS 18508]: http://isotc.iso.org/livelink/livelink?func=ll&objId=16769292&objAction=Open
+[MPI]: http://www.mpi-forum.org
+[GCC]: http://gcc.gnu.org
+[gfortran]: https://gcc.gnu.org/wiki/GFortran
+[opencoarrays module]: ./src/extensions/opencoarrays.F90
+[MPICH]: http://www.mpich.org
+[MVAPICH]: http://mvapich.cse.ohio-state.edu/)
+[OpenMPI]: http://www.open-mpi.org
+[Sourcery, Inc.]: http://www.sourceryinstitute.org
+[Google]: http://google.com
+[CINECA]: http://www.cineca.it/en
+[NERSC]: http://www.nersc.gov
+[National Center for Atmospheric Research]: http://ncar.ucar.edu
+[INSTALL.md]: ./INSTALL.md
+[GASNet]: http://gasnet.lbl.gov
+[CONTRIBUTING.md]: ./CONTRIBUTING.md
+[STATUS.md]: ./STATUS.md
+[GETTING_STARTED.md]: ./GETTING_STARTED.md
+[Google Groups]: https://groups.google.com
+[Google Group]: https://groups.google.com/forum/#!forum/opencoarrays
+[subscribing]: https://groups.google.com/forum/#!forum/opencoarrays/join
+[opencoarrays at googlegroups.com]: mailto:opencoarrays at googlegroups.com
+[Google Summer of Code]: https://www.google-melange.com/gsoc/org2/google/gsoc2014/gcc
+[OpenCoarrays Google Group]: https://groups.google.com/forum/#!forum/opencoarrays)
+[Issues]: https://github.com/sourceryinstitute/opencoarrays/issues
+[Releases]: https://github.com/sourceryinstitute/opencoarrays/releases
+
+[build img]: https://img.shields.io/travis-ci/sourceryinstitute/opencoarrays/master.svg?style=flat-square "View Travis-CI builds"
+[CI Master Branch]: https://travis-ci.org/sourceryinstitute/opencoarrays?branch=master "View Travis-CI builds"
+[download img]: https://img.shields.io/github/downloads/sourceryinstitute/opencoarrays/total.svg?style=flat-square "Download count image source"
+[license img]: https://img.shields.io/badge/License-BSD--3-blue.svg?style=flat-square "View BSD-3 License"
+[release img]: https://img.shields.io/github/release/sourceryinstitute/opencoarrays.svg?style=flat-square "View latest release"
+[pdf img]: https://img.shields.io/badge/PDF-README.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/STATUS.md b/STATUS.md
new file mode 100644
index 0000000..51e4b47
--- /dev/null
+++ b/STATUS.md
@@ -0,0 +1,205 @@
+<a name="top"> </a>
+
+[This document is formatted with GitHub-Flavored Markdown.              ]:#
+[For better viewing, including hyperlinks, read it online at            ]:#
+[https://github.com/sourceryinstitute/opencoarrays/blob/master/STATUS.md]:#
+
+OpenCoarrays Status
+===================
+
+[![Download as PDF][pdf img]](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/STATUS.pdf)
+
+Download this file as a PDF document
+[here](http://md2pdf.herokuapp.com/sourceryinstitute/opencoarrays/blob/master/STATUS.pdf).
+
+ *  [Feature Coverage](#feature-coverage)
+ *  [Compiler Status](#compiler-status)
+     * [OpenCoarrays-Aware (OCA) Coarray Fortran (CAF) Compilers]
+     * [Non-OCA CAF Compilers]
+     * [Non-CAF Compilers]
+ *  [Library Status](#libary-status)
+     *  [libcaf_mpi]
+     *  [libcaf_x]
+     *  [libcaf_gasnet]
+     *  [libcaf_armci]
+     *  [libcaf_single]
+ *  [Known Issues](#known-issues)
+     * [Library Issues](#library-issues)
+     * [Compiler Issues](#compiler-issues)
+         * [GNU (gfortran)]
+         * [Cray (ftn)]
+         * [Intel (ifort)]
+         * [Numerical Algorithms Group (nagfor)]
+         * [Portland Group (pgfortran)]
+         * [IBM (xlf)]
+ *  [To-Do List](#to-do-list)
+
+Feature Coverage
+----------------
+
+ * Except as noted under [Known Issues], [libcaf_mpi] supports the following features as described
+   in the Fortran 2008 standard:
+     * allocatable and non-allocatable coarrays of intrinsic or derived type
+     * synchronization statements
+     * atomics
+     * locks
+     * critical
+ * Except as noted under [Known Issues], [libcaf_mpi] supports the collective
+   subroutines proposed for Fortran 2015 in the draft Technical Specification
+   [TS 18508] _Additional Parallel Features in Fortra_ subroutines for a limited
+   set of intrinsic types and kinds.  Adding additional types and kinds is
+   straightforward.  Please submit a request via the [Issues] page or consider
+   adding the requisite code by [forking the OpenCoarrays repository] and submitting
+   [pull request via GitHub]. Also see [CONTRIBUTING.md] for more information.
+
+Compiler Status
+---------------
+
+The OpenCoarrays CMake build and test scripts detect the compiler identity, version, and operating system (OS).  The scripts use this information to build and test the approproiate functionality for the compiler and OS. Each current compilers' status falls into one of three categories:
+
+<a name="oca-caf-compilers">
+ * **OpenCoarrays-Aware (OCA) Coarray Fortran (CAF) Compilers**</a>
+     * _Definition:_ The compiler translates CAF statements into OpenCoarrays application binary interface ([ABI]) calls.
+     * _Example_: GNU Fortran 5.1 or later (see <https://gcc.gnu.org/wiki/Coarray> for the compiler's CAF status..)
+     * _Use case_: compile most Fortran 2008 coarray programs and some programs that use proposed Fortran 2015 features.
+<a name="non-oca-caf-compilers">
+ * **Non-OCA CAF Compilers**</a>
+     * _Definition:_ The compiler supports CAF but does not generate calls to the OpenCoarrays [ABI].
+     * _Examples_: Cray compiler (except on CS Series clusters), Intel compiler (except on OS X).
+     * _Use case_: extend the compiler's native CAF using the [opencoarrays module] types and procedures.
+<a name="non-caf-compilers">
+ * **Non-CAF Compilers**</a>
+     * _Definition_: The compiler provides no direct support for CAF, but the user can access a subset of CAF features via use association with the [opencoarrays module], e.g., `use opencoarrays, only : co_sum`.
+     * _Examples_: GNU Fortran 4.9 or any compiler not mentioned above.
+     * _Use case_: Use the OpenCoarrays `caf` compiler wrapper to compile those CAF  programs for which the proposed Fortran 2015 collective subroutines cover all of the application's communication requirements.
+
+We have encountered several research applications that match the latter use case.  If you encounter difficulties, please submit a bug report or feature request via the [Issues] page. Also submit a feature request to the relevant compiler technical support.
+
+The OpenCoarrays team offers contract development and support for making compilers OpenCoarrays-aware.  If this is of interest, please inform the compiler's technical support as well as the OpenCoarrays team.   To contribute code, including documentation and tests, see the [CONTRIBUTING.md] file.  To contribute funding, including funding in support of feature reqeusts, see the [Sourcery Store].
+
+Library Status
+--------------
+
+<a name="libcaf-mpi">
+* **libcaf_mpi**</a> (Default CMake build): Production transport layer that uses
+  the Message Passing Interface ([MPI]) 3.0 one-sided communication, which
+  exploits a hardware platform's native support for Remote Direct Memory
+  Access (RDMA) if available.
+<a name="libcaf-x">
+* **libcaf_x**</a> (where x = [CUDA], [OpenMP], [Pthreads], [OpenSHMEM], etc.): the
+  OpenCoarrays [ABI] design facilitates implementation atop any one of several
+  low-level parallel programming models, vectorization APIs, or combination
+  thereof. We have performed limited evaluations and research development of
+  versions based on multiple APIs.  Please email the [OpenCoarrays Google Group]
+  for support with targeting other APIs and hardware, including, for example,
+  graphics processing units (GPUs) and heterogeneous CPU/GPU platforms.
+<a name="libcaf-gasnet">
+* **libcaf_gasnet**</a> (Advanced Make build): Experimental transport layer that
+  is currently out-of-date but might exhibit higher performance than [MPI] on
+  platforms for which [GASNet] provides a tuned conduit.  Contact the
+  [OpenCoarrays Google Group] for further information.
+<a name="libcaf-armci">
+* **libcaf_armci**</a> (Unsupported): developed for research purposes and evaluation.
+<a name="libcaf-single">
+* **libcaf_single**</a> (Unsupported): developed to mirror the like-named library that
+  is included in GNU Fortran to facilitate compiling single-image (sequential)
+  executables from CAF programs in the absence of a parallel communication library.
+
+Known Issues
+------------
+
+### Library Issues ###
+
+* The [opencoarrays module] and `caf` compiler wrapper do not support the square-bracket
+  syntax required for point-to-point communication.  This limitation only impacts
+  [non-CAF compilers]. For a list of other limitations with non-CAF compilers, execute
+  the `caf` bash script with no arguments.  The `caf` script is installed in the `bin`
+  subdirectory of the installation path.
+* Efficient strided array transfer works only for intrinsic types.
+* Efficient strided array transfer is not supported for remote-to-remote transfers.
+* Overwriting a coarray with itself is not managed efficiently for strided transfers.
+* Communication
+     * Vector subscripts are not yet supported
+     * For character assignments, some issues with padding and character kind conversions exist.
+     * For array assignments, some issues with numeric type conversion exist.
+
+
+### Compiler Issues ###
+
+<a name="compiler-issues-gnu">
+* **GNU** (gfortran)</a>
+     * Derived-type coarrays with allocatable/pointer components are not yet handled
+       properly.
+     * Problems exist with combining array access to a corray with a scalar component
+        access as in `coarray(:,:)[i]%comp`.
+     * An internal compiler error (ICE) occurs with non-allocatable, polymorphic coarrays
+       in `associate` or `select type` statements.
+     * `co_reduce` requires GCC 5.4.0 or later.
+     * `co_reduce` only supports arguments of intrinsic type.
+     * Proper execution of `stop` when `this_image()>1` requires GCC 5.4.0 or later.
+     * No support for type finalization or allocatable components of derived-type coarrays
+       passed to the collective subroutines (e.g., `co_sum`, `co_reduce`, etc.).
+     * Optimization levels other than `-O0` require GCC 5.3.0 or later. 
+     * Using `stop` to halt an individual image without halting all images requires GCC 5.4.0 or later. 
+<a name="compiler-issues-intel">
+* **Intel** (ifort)</a>
+     * Supported via the [opencoarrays module]  only.
+<a name="compiler-issues-cray">
+* **Cray** (ftn) </a>
+     * Supported via the [opencoarrays module] only.
+<a name="compiler-issues-nag">
+* **Numerical Algorithms Group** (nagfor)</a>
+     * Supported via the [opencoarrays module] only.
+<a name="compiler-issues-pg">
+* **Portland Group** (pgfortran)</a>
+     * Supported via the [opencoarrays module] only.
+<a name="compiler-issues-ibm">
+* **IBM** (xlf)</a>
+     * Supported via the [opencoarrays module] only.
+
+To-Do List
+----------
+
+* [ ] Additional tests and documentation.
+* [ ] Improvement of error handling and diagnostics, including but not
+      limited to filling the `ERRMSG=` variable in case of errors.
+* [ ] Providing a diagnostic mode with run-time consistency checks.
+* [ ] Better integration with the test cases of GCC.  For more information,
+      see the GCC source code files in `gcc/testsuite/gfortran.dg/`,
+      in particular, the `dg-do run` tests in `coarray*f90` and `coarray/`).
+
+
+[Hyperlinks]:#
+   [OpenMP]: http://openmp.org
+   [CUDA]: http://www.nvidia.com/object/cuda_home_new.html
+   [Pthreads]: https://computing.llnl.gov/tutorials/pthreads/
+   [MPI]: http://www.mpi-forum.org
+   [OpenSHMEM]: http://openshmem.org
+   [GASNet]: https://gasnet.lbl.gov
+   [CONTRIBUTING.md]: ./CONTRIBUTING.md
+   [OpenCoarrays-Aware (OCA) Coarray Fortran (CAF) Compilers]: #oca-caf-compilers
+   [Known Issues]: #known-issues
+   [Non-OCA CAF Compilers]: #non-oca-caf-compilers
+   [Non-CAF Compilers]: #non-caf-compilers
+   [libcaf_mpi]: #libcaf-mpi
+   [libcaf_x]: #libcaf-x
+   [libcaf_gasnet]:  #libcaf-gasnet
+   [libcaf_single]: #libcaf-single
+   [libcaf_armci]: #libcaf-armci
+  [GNU (gfortran)]: #compiler-issues-gnu
+  [Cray (ftn)]: #compiler-issues-cray
+  [Intel (ifort)]: #compiler-issues-intel
+  [Numerical Algorithms Group (nagfor)]: #compiler-issues-nag
+  [Portland Group (pgfortran)]: #compiler-issues-pg
+  [IBM (xlf)]: #compiler-issues-ibm
+  [forking the OpenCoarrays repository]: https://github.com/sourceryinstitute/opencoarrays/blob/master/STATUS.md#fork-destination-box
+
+[TS 18508]: http://isotc.iso.org/livelink/livelink?func=ll&objId=17181227&objAction=Open
+[opencoarrays module]: ./src/extensions/opencoarrays.F90
+[ABI]: https://gcc.gnu.org/onlinedocs/gfortran/Function-ABI-Documentation.html#Function-ABI-Documentation
+[pull requests via GitHub]: https://github.com/sourceryinstitute/opencoarrays/compare
+[pull request via GitHub]: https://github.com/sourceryinstitute/opencoarrays/compare
+[OpenCoarrays Google Group]: https://groups.google.com/forum/#!forum/opencoarrays
+[Sourcery Store]: http://www.sourceryinstitute.org/store
+[Issues]: https://github.com/sourceryinstitute/opencoarrays/issues
+[pdf img]: https://img.shields.io/badge/PDF-STATUS.md-6C2DC7.svg?style=flat-square "Download as PDF"
diff --git a/cmake/pkg/OpenCoarraysConfig.cmake.in b/cmake/pkg/OpenCoarraysConfig.cmake.in
new file mode 100644
index 0000000..1be75d0
--- /dev/null
+++ b/cmake/pkg/OpenCoarraysConfig.cmake.in
@@ -0,0 +1 @@
+include("${CMAKE_CURRENT_LIST_DIR}/OpenCoarraysTargets.cmake")
diff --git a/cmake/uninstall.cmake.in b/cmake/uninstall.cmake.in
new file mode 100644
index 0000000..dd39593
--- /dev/null
+++ b/cmake/uninstall.cmake.in
@@ -0,0 +1,23 @@
+# Adapted from http://www.cmake.org/Wiki/CMake_FAQ#Can_I_do_.22make_uninstall.22_with_CMake.3F May 1, 2014
+
+if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
+  message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt")
+endif(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
+
+file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files)
+string(REGEX REPLACE "\n" ";" files "${files}")
+foreach(file ${files})
+  message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
+  if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+    exec_program(
+      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+      OUTPUT_VARIABLE rm_out
+      RETURN_VALUE rm_retval
+      )
+    if(NOT "${rm_retval}" STREQUAL 0)
+      message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
+    endif(NOT "${rm_retval}" STREQUAL 0)
+  else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+    message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
+  endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+endforeach(file)
diff --git a/doc/dependency_tree/gitkeep.sh b/doc/dependency_tree/gitkeep.sh
new file mode 100755
index 0000000..29451a3
--- /dev/null
+++ b/doc/dependency_tree/gitkeep.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# gitkeep.sh
+#
+
+description=\
+'bash script for adding empty ".gitkeep" files at the branch ends of empty directory trees'
+
+use_case=\
+"Use case:
+
+    Force git to track otherwise empty directories such as doc/dependency_tree/opencoarrays,
+    which exists solely for purposes of displaying the OpenCoarrays dependency tree via the
+    command 'tree opencoarrays'."
+#
+# OpenCoarrays is distributed under the OSI-approved BSD 3-clause License:
+# Copyright (c) 2015, 2016, Sourcery, Inc.
+# Copyright (c) 2015, 2016, Sourcery Institute
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice, this
+#    list of conditions and the following disclaimer in the documentation and/or
+#    other materials provided with the distribution.
+# 3. Neither the names of the copyright holders nor the names of their contributors
+#    may be used to endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+this_script=`basename $0`
+
+usage()
+{
+    echo ""
+    echo " $this_script - $description"
+    echo ""
+    echo " Usage: "
+    echo "      $this_script <path-to-modify>"
+    echo ""
+    echo " Examples:"
+    echo ""
+    echo "   $this_script ."
+    echo ""
+    printf "$use_case"
+    exit 1
+}
+
+# If this script is invoked without arguements, print usage information
+# and terminate execution of the script.
+if [[ $# == 0 || "$1" == "-h" || "$1" == "--help" ]]; then
+  usage | less
+  exit 1
+fi
+
+# Interpret the first argument as the name of the directory tree to fill
+export path_to_modify=$1
+
+# Create an empty ".gitkeep" file in all empty subdirectories
+find $path_to_modify -type d -empty -exec touch {}/.gitkeep \;
diff --git a/doc/dependency_tree/opencoarrays-tree.txt b/doc/dependency_tree/opencoarrays-tree.txt
new file mode 100644
index 0000000..6f4fa74
--- /dev/null
+++ b/doc/dependency_tree/opencoarrays-tree.txt
@@ -0,0 +1,12 @@
+opencoarrays
+├── cmake-3.4.0
+└── mpich-3.1.4
+    └── gcc-6.1.0
+        ├── flex-2.6.0
+        │   └── bison-3.0.4
+        │       └── m4-1.4.17
+        ├── gmp
+        ├── mpc
+        └── mpfr
+
+9 directories, 0 files
diff --git a/doc/dependency_tree/opencoarrays/cmake-3.4.0/.gitkeep b/doc/dependency_tree/opencoarrays/cmake-3.4.0/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/flex-2.6.0/bison-3.0.4/m4-1.4.17/.gitkeep b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/flex-2.6.0/bison-3.0.4/m4-1.4.17/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/flex-2.6.0/bison-3.0.4/m4-1.4.17/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/gmp/.gitkeep b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/gmp/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/gmp/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/mpc/.gitkeep b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/mpc/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/mpc/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/mpfr/.gitkeep b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/mpfr/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/doc/dependency_tree/opencoarrays/mpich-3.1.4/gcc-6.1.0/mpfr/.gitkeep
@@ -0,0 +1 @@
+
diff --git a/doc/robodoc.css b/doc/robodoc.css
new file mode 100644
index 0000000..19b4807
--- /dev/null
+++ b/doc/robodoc.css
@@ -0,0 +1,309 @@
+/****h* ROBODoc/ROBODoc Cascading Style Sheet
+ * FUNCTION
+ *   This is a modified version of the default Robodoc cascading 
+ *   style sheet generated with ROBODoc.
+ *
+ *   This style-sheet defines the following layout
+ *      +----------------------------------------+
+ *      |    logo                                |
+ *      +----------------------------------------+
+ *      |    extra                               |
+ *      +----------------------------------------+
+ *      |                              | navi-   |
+ *      |                              | gation  |
+ *      |      content                 |         |
+ *      |                              |         |
+ *      +----------------------------------------+
+ *      |    footer                              |
+ *      +----------------------------------------+
+ *
+ *   The Robodoc default style sheet is based on a style sheet
+ *   that was automatically generated with the Strange Banana style
+ *   sheet generator.  See http://www.strangebanana.com/generator.aspx
+ *
+ ******
+ * $Id: html_generator.c,v 1.94 2008/06/17 11:49:27 gumpu Exp $
+ */
+
+body
+{
+    background-color:    rgb(255,255,255);
+    color:               rgb(98,84,55);
+    font-family:         Arial, serif;
+    border-color:        rgb(226,199,143);
+}
+
+pre
+{
+    font-family:      monospace;
+    margin:      15px;
+    padding:     5px;
+    white-space: pre;
+    color:       #ffe;
+}
+
+pre.source
+{
+    background-color: rgb(192,192,192);
+    border: dashed #aa9 1px;
+}
+
+p
+{
+    margin:15px;
+}
+
+p.item_name 
+{
+    font-weight: bolder;
+    margin:5px;
+    font-size: 120%;
+}
+
+#content
+{
+    font-size:           100%;
+    color:               rgb(0,128,128);
+    background-color:    rgb(255,255,255);
+    border-left-width:   0px; 
+    border-right-width:  0px; 
+    border-top-width:    0px; 
+    border-bottom-width: 0px;
+    border-left-style:   none; 
+    border-right-style:  none; 
+    border-top-style:    none; 
+    border-bottom-style: none;
+    padding:             40px 31px 14px 17px;
+    border-color:        rgb(0,128,128);
+    text-align:          justify;
+}
+
+#navigation
+{
+    background-color: rgb(192,192,192);
+    color:            rgb(230,221,202);
+    font-family:      "Times New Roman", serif;
+    font-style:       normal;
+    border-color:     rgb(0,128,128);
+}
+
+a.menuitem
+{
+    font-size: 120%;
+    background-color:    rgb(0,128,128);
+    color:               rgb(255,255,255);
+    font-variant:        normal;
+    text-transform:      none;
+    font-weight:         normal;
+    padding:             1px 8px 3px 1px;
+    margin-left:         5px; 
+    margin-right:        5px; 
+    margin-top:          5px; 
+    margin-bottom:       5px;
+    border-color:        rgb(159,126,57);
+    text-align:          right;
+}
+
+#logo, #logo a
+{
+    font-size: 130%;
+    background-color:   rgb(0,128,128);
+    color:              rgb(255,255,255);
+    font-family:        Georgia, serif;
+    font-style:         normal;
+    font-variant:       normal;
+    text-transform:     none;
+    font-weight:        bold;
+    padding:            20px 18px 20px 18px;
+    border-color:       rgb(0,128,128);
+    text-align:         right;
+}
+
+#extra, #extra a
+{
+    font-size: 128%;
+    background-color:    rgb(255,255,255);
+    color:               rgb(0,128,128);
+    font-style:          normal;
+    font-variant:        normal;
+    text-transform:      none;
+    font-weight:         normal;
+    border-left-width:   0px; 
+    border-right-width:  0px; 
+    border-top-width:    0px; 
+    border-bottom-width: 0px;
+    border-left-style:   none; 
+    border-right-style:  none; 
+    border-top-style:    none; 
+    border-bottom-style: none;
+    padding: 12px 12px 12px 12px;
+    border-color:        rgb(0,128,128);
+    text-align:          center;
+}
+
+#content a
+{
+    color:              rgb(159,126,57);
+    text-decoration:    none;
+}
+
+#content a:hover, #content a:active
+{
+    color:              rgb(255,255,255);
+    background-color:   rgb(159,126,57);
+}
+
+a.indexitem
+{
+    display: block;
+}
+
+h1, h3, h5, h6
+{
+    background-color: rgb(192,192,192);
+    font-family:      Arial, serif;
+    font-style:       normal;
+    font-variant:     normal;
+    text-transform:   none;
+    font-weight:      normal;
+}
+
+h2, h4
+{
+    background-color: rgb(255,255,255);
+    font-family:      Arial, serif;
+    font-style:       normal;
+    font-variant:     normal;
+    text-transform:   none;
+    font-weight:      normal;
+}
+
+h1
+{
+    font-size: 151%;
+}
+
+h2
+{
+    font-size: 142%;
+}
+
+h3
+{
+    font-size: 133%;
+}
+
+h4
+{
+    font-size: 124%;
+}
+
+h5
+{
+    font-size: 115%;
+}
+
+h6
+{
+    font-size: 106%;
+}
+
+#navigation a
+{
+    text-decoration: none;
+}
+
+.menuitem:hover
+{
+    background-color:   rgb(195,165,100);
+    color:              rgb(0,128,128);
+}
+
+#extra a
+{
+    text-decoration: none;
+}
+
+#logo a
+{
+    text-decoration: none;
+}
+
+#extra a:hover
+{
+}
+
+/* layout */
+#navigation
+{
+    width:       22%; 
+    position:    relative; 
+    top:         0; 
+    right:       0; 
+    float:       right; 
+    text-align:  center;
+    margin-left: 10px;
+}
+
+.menuitem       {width: auto;}
+#content        {width: auto;}
+.menuitem       {display: block;}
+
+
+div#footer
+{
+    background-color: rgb(198,178,135);
+    color:      rgb(98,84,55);
+    clear:      left;
+    width:      100%;
+    font-size:   71%;
+}
+
+div#footer a
+{
+    background-color: rgb(198,178,135);
+    color:            rgb(98,84,55);
+}
+
+div#footer p
+{
+    margin:0;
+    padding:5px 10px
+}
+
+span.keyword
+{
+    color: #000;
+}
+
+span.comment
+{
+    color: #080;
+}
+
+span.quote
+{
+    color: #F00;
+}
+
+span.squote
+{
+    color: #F0F;
+}
+
+span.sign
+{
+    color: #008B8B;
+}
+
+span.line_number
+{
+    color: #808080;
+}
+
+ at media print
+{
+    #navigation {display: none;}
+    #content    {padding: 0px;}
+    #content a  {text-decoration: underline;}
+}
diff --git a/doc/robodoc.rc b/doc/robodoc.rc
new file mode 100644
index 0000000..7764dca
--- /dev/null
+++ b/doc/robodoc.rc
@@ -0,0 +1,67 @@
+# Example robodoc.rc
+#
+items:
+    NAME
+    SYNOPSIS
+    PROGRAM
+    FUNCTION
+    SUBROUTINE
+    MODULE
+    INPUTS
+    OUTPUTS
+    USAGE
+    PUBLIC
+preformatted items:
+ignore files:
+options:
+    --doc html
+    --one_file_per_header 
+    --src ..
+    --html 
+    --multidoc 
+    --documenttitle "OpenCoarrays" 
+    --index 
+    --tabsize 8 
+    --nopre 
+    --sections
+    --css robodoc.css
+    --syntaxcolors
+headertypes:
+    p  "Programs"              robo_programs      1
+    m  "Modules"               robo_modules       2
+    d  "Derived types"         robo_derived_types 3
+    t  "Type-bound procedures" robo_type_bound_procedures  3
+    g  "Generic Interfaces"    robo_constructor_functions 3
+    f  "Functions"             robo_functions   3
+    s  "Subroutines"           robo_subroutines 3
+    l  "Libraries"             robo_libraries 1
+    e  "Tests"                 robo_tests 1
+ignore files:
+    README
+    *~
+accept files:
+    *.f90
+    *.F90
+    *.c
+    *.cpp
+keywords:
+    program 
+    use 
+    iso_fortran_env
+    implicit 
+    none
+    integer
+    parameter
+    real
+    kind 
+    allocatable
+    allocate
+    do
+    enddo
+    end
+    sync 
+    all
+    if
+    deallocate
+    num_images
+    this_image
diff --git a/doc/sample-compiler-output.c b/doc/sample-compiler-output.c
new file mode 100644
index 0000000..f87b2ab
--- /dev/null
+++ b/doc/sample-compiler-output.c
@@ -0,0 +1,108 @@
+/****p* doc/sample-compiler-output.c
+ * NAME
+ *   sendrcv
+ * SYNOPSIS
+ *   This C program represents the code an OpenCoarrays-compatible
+ *   Fortran compiler might generate from doc/sample-fortran-source.f90.
+ *   The code delegates all necessary synchronization and communicaiton
+ *   to an OpenCoarrays transport layer.  In this program, image 1 puts
+ *   its local elements of an array coarray into the corresponding
+ *   elements of image 2.
+ *
+ * SOURCE
+*/
+sendrecv ()
+{
+  struct array2_real(kind=8) d;
+  integer(kind=4) i;
+  integer(kind=4) me;
+  integer(kind=4) np;
+
+  d.data = 0B;
+  {
+    integer(kind=4) overflow.0;
+
+    overflow.0 = 0;
+    if (overflow.0 != 0)
+      {
+        _gfortran_runtime_error (
+         &"Integer overflow when calculating the amount of memory to allocate"[1]{lb: 1 sz: 1}
+        );
+      }
+    else
+      {
+        if (d.data != 0B)
+          {
+            _gfortran_runtime_error_at (
+              &"At line 9 of file sample-fortran-source.f90"[1]{lb: 1 sz: 1},
+              &"Attempting to allocate already allocated variable \'%s\'"[1]{lb: 1 sz: 1}, &"d"[1]{lb: 1 sz: 1}
+            );
+          }
+        else
+          {
+            d.data = (void * restrict) _gfortran_caf_register (8000, 1, &d.token, 0B, 0B, 0);
+          }
+      }
+    d.dtype = 537;
+    d.dim[0].lbound = 1;
+    d.dim[0].ubound = 1000;
+    d.dim[0].stride = 1;
+    d.dim[1].lbound = 1;
+    d.offset = -1;
+    np = _gfortran_caf_num_images (0, -1);
+    me = _gfortran_caf_this_image (0);
+    i = 1;
+    if (i <= 1000)
+      {
+        while (1)
+          {
+            {
+              logical(kind=4) D.2368;
+
+              (*(real(kind=8)[0:] * restrict) d.data)[d.offset + (integer(kind=8)) i] = (real(kind=8)) i;
+              L.1:;
+              D.2368 = i == 1000;
+              i = i + 1;
+              if (D.2368) goto L.2;
+            }
+          }
+      }
+    L.2:;
+    __sync_synchronize ();
+    _gfortran_caf_sync_all (0B, 0B, 0);
+    if (me == 1)
+      {
+        _gfortran_caf_send (d.token, 0, 3 - (integer(kind=4)) d.dim[1].lbound, &d, 0B, &d, 8, 8);
+      }
+    L.3:;
+    __sync_synchronize ();
+    _gfortran_caf_sync_all (0B, 0B, 0);
+    if (d.data == 0B)
+      {
+        _gfortran_runtime_error_at (
+         &"At line 24 of file sample-fortran-source.f90"[1]{lb: 1 sz: 1},
+         &"Attempt to DEALLOCATE unallocated \'%s\'"[1]{lb: 1 sz: 1}, &"d"[1]{lb: 1 sz: 1}
+        );
+      }
+    else
+      {
+        _gfortran_caf_deregister (&d.token, 0B, 0B, 0);
+      }
+    d.data = 0B;
+  }
+}
+
+
+main (integer(kind=4) argc, character(kind=1) * * argv)
+{
+  static integer(kind=4) options.1[9] = {68, 1023, 0, 0, 1, 1, 0, 0, 31};
+
+  _gfortran_caf_init (&argc, &argv);
+  _gfortran_set_args (argc, argv);
+  _gfortran_set_options (9, &options.1[0]);
+  sendrecv ();
+  __sync_synchronize ();
+  _gfortran_caf_finalize ();
+  return 0;
+}
+/******
diff --git a/doc/sample-fortran-source.f90 b/doc/sample-fortran-source.f90
new file mode 100644
index 0000000..83f86e7
--- /dev/null
+++ b/doc/sample-fortran-source.f90
@@ -0,0 +1,37 @@
+!****p* doc/sample-fortran-source.f90
+! NAME
+!   sendrcv
+! SYNOPSIS
+!   In this simple coarray Fortran program, image 1 puts its
+!   local elements of an array coarray into the corresponding
+!   elements of image 2.  The corresponding C program that an
+!   OpenCoarrays-compatible compiler might generate from this
+!   code is in doc/sample-compiler-output.c.
+! SOURCE
+program sendrecv
+  use iso_fortran_env
+  implicit none
+
+  integer :: me, np, i
+  integer, parameter :: n=1000
+  real(kind=real64), allocatable :: d(:)[:]
+
+  allocate(d(n)[*])
+
+  np = num_images()
+  me = this_image()
+
+  do i=1,n
+     d(i) = i
+  enddo
+
+  sync all
+
+  if(me == 1) d(:)[2] = d
+
+  sync all
+
+  deallocate(d)
+
+end program
+!******
diff --git a/install.sh b/install.sh
new file mode 100755
index 0000000..c8bb2d2
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,312 @@
+#!/usr/bin/env bash
+#
+# install.sh
+#
+# -- This script installs OpenCoarrays and its prerequisites.
+#
+# OpenCoarrays is distributed under the OSI-approved BSD 3-clause License:
+# Copyright (c) 2015-2016, Sourcery, Inc.
+# Copyright (c) 2015-2016, Sourcery Institute
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice, this
+#    list of conditions and the following disclaimer in the documentation and/or
+#    other materials provided with the distribution.
+# 3. Neither the names of the copyright holders nor the names of their contributors
+#    may be used to endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Portions of this script derive from BASH3 Boilerplate and are distributed under
+# the following license:
+#
+# The MIT License (MIT)
+#
+# Copyright (c) 2014 Kevin van Zonneveld
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# The invocation of bootstrap.sh below performs the following tasks:
+# (1) Import several bash3boilerplate helper functions & default settings.
+# (2) Set several variables describing the current file and its usage page.
+# (3) Parse the usage information (default usage file name: current file's name with -usage appended).
+# (4) Parse the command line using the usage information.
+
+### Start of boilerplate -- do not edit this block #######################
+export OPENCOARRAYS_SRC_DIR="${OPENCOARRAYS_SRC_DIR:-${PWD%/}}"
+if [[ ! -f "${OPENCOARRAYS_SRC_DIR}/src/libcaf.h" ]]; then
+  echo "Please run this script inside the top-level OpenCoarrays source directory or "
+  echo "set OPENCOARRAYS_SRC_DIR to the OpenCoarrays source directory path."
+  exit 1
+fi
+export B3B_USE_CASE="${B3B_USE_CASE:-${OPENCOARRAYS_SRC_DIR}/prerequisites/use-case}"
+if [[ ! -f "${B3B_USE_CASE:-}/bootstrap.sh" ]]; then
+  echo "Please set B3B_USE_CASE to the bash3boilerplate use-case directory path."
+  exit 2
+else
+    # shellcheck source=./prerequisites/use-case/bootstrap.sh
+    source "${B3B_USE_CASE}/bootstrap.sh" "$@"
+fi
+### End of boilerplate -- start user edits below #########################
+
+# Set expected value of present flags that take no arguments
+export __flag_present=1
+
+# Set up a function to call when receiving an EXIT signal to do some cleanup. Remove if
+# not needed. Other signals can be trapped too, like SIGINT and SIGTERM.
+function cleanup_before_exit () {
+  info "Cleaning up. Done"
+}
+trap cleanup_before_exit EXIT # The signal is specified here. Could be SIGINT, SIGTERM etc.
+
+
+### Validation (decide what's required for running your script and error out)
+#####################################################################
+
+[ -z "${LOG_LEVEL:-}" ] && emergency "Cannot continue without LOG_LEVEL. "
+
+# shellcheck disable=SC2154
+if [[ "${arg_v}" == "${__flag_present}" || "${arg_l}" == "${__flag_present}" || ! -z "${arg_P:-${arg_U:-${arg_V:-${arg_D}}}}" ]]; then
+   print_debug_only=7
+   if [ "$(( LOG_LEVEL < print_debug_only ))" -ne 0 ]; then
+     debug "Supressing info and debug messages: one of {-l, -v, -P, -U, -V, -D} present."
+     suppress_info_debug_messages
+   fi
+fi
+
+[ ! -z "${arg_D}" ] && [ ! -z "${arg_P:-${arg_U:-${arg_V}}}" ] &&
+  emergency "Please pass only one of {-D, -p, -P, -U, -V} or a longer equivalent (multiple detected). [exit 101]"
+
+[ ! -z "${arg_P}" ] && [ ! -z "${arg_U:-${arg_V}}" ] &&
+  emergency "Please pass only one of {-D, -p, -P, -U, -V} or a longer equivalent (multiple detected). [exit 103]"
+
+[ ! -z "${arg_U}" ] && [ ! -z "${arg_V}" ] &&
+  emergency "Please pass only one of {-D, -p, -P, -U, -V} or a longer equivalent (multiple detected). [exit 104]"
+
+### Print bootstrapped magic variables to STDERR when LOG_LEVEL
+### is at the default value (6) or above.
+#####################################################################
+# shellcheck disable=SC2154
+{
+info "__file: ${__file}"
+info "__dir: ${__dir}"
+info "__base: ${__base}"
+info "__os: ${__os}"
+info "__usage: ${__usage}"
+info "LOG_LEVEL: ${LOG_LEVEL}"
+
+info  "-c (--with-c):           ${arg_c}"
+info  "-C (--with-cxx):         ${arg_C}"
+info  "-d (--debug):            ${arg_d}"
+info  "-D (--print-downloader): ${arg_D}"
+info  "-e (--verbose):          ${arg_e}"
+info  "-f (--with-fortran):     ${arg_f}"
+info  "-h (--help):             ${arg_h}"
+info  "-i (--install-prefix):   ${arg_i}"
+info  "-I (--install-version):  ${arg_I}"
+info  "-j (--num-threads):      ${arg_j}"
+info  "-l (--list-packages):    ${arg_l}"
+info  "-m (--with-cmake):       ${arg_m}"
+info  "-M (--with-mpi):         ${arg_M}"
+info  "-n (--no-color):         ${arg_n}"
+info  "-p (--package):          ${arg_p}"
+info  "-P (--print-path):       ${arg_P}"
+info  "-U (--print-url):        ${arg_U}"
+info  "-v (--version):          ${arg_v}"
+info  "-V (--print-version):    ${arg_V}"
+}
+# This file is organized into three sections:
+# 1. Command-line argument and environment variable processing.
+# 2. Function definitions.
+# 3. Main body.
+# The script depends on several external programs, including a second script that
+# builds prerequisite software.  Building prerequisites requires network access
+# unless tar balls of the prerequisites are present.
+
+# TODO:
+# 1. Collapse the body of the main conditional branches in the find_or_install function
+#    into one new function.
+# 2. Verify that script-installed packages meet the minimum version number.
+# 3. Add a script_transfer function to collapse the stack_pop x; stack_push z y
+#    pattern into one statement
+# 4. Consider adding mpich and cmake to the dependency stack before passing them to
+#    find_or_install to make the blocks inside find_or_install more uniform.
+#    Alternatively, check the dependency stacks for the package before entering the
+#    main conditional blocks in find_or_install.
+#
+
+
+# __________ Process command-line arguments and environment variables _____________
+
+this_script="$(basename "$0")"
+export this_script
+
+export install_path="${arg_i%/}"
+info "install_path=\"${install_path}\""
+
+export num_threads="${arg_j}"
+info "num_threads=\"${arg_j}\""
+
+export opencoarrays_src_dir="${OPENCOARRAYS_SRC_DIR}"
+info "opencoarrays_src_dir=${OPENCOARRAYS_SRC_DIR}"
+
+export build_path="${opencoarrays_src_dir}"/prerequisites/builds
+info "build_path=\"${opencoarrays_src_dir}\"/prerequisites/builds"
+
+export build_script="${opencoarrays_src_dir}"/prerequisites/build.sh
+info "build_script=\"${opencoarrays_src_dir}\"/prerequisites/build.sh"
+
+# ___________________ Define functions for use in the Main Body ___________________
+
+# Include stack management functions
+#. ./prerequisites/stack.sh
+# shellcheck source=./prerequisites/stack.sh
+source $opencoarrays_src_dir/prerequisites/stack.sh
+stack_new dependency_pkg
+stack_new dependency_exe
+stack_new dependency_path
+stack_new script_installed
+
+# shellcheck source=./prerequisites/install-functions/find_or_install.sh
+source $opencoarrays_src_dir/prerequisites/install-functions/find_or_install.sh
+
+# shellcheck source=./prerequisites/install-functions/print_header.sh
+source $opencoarrays_src_dir/prerequisites/install-functions/print_header.sh
+
+# shellcheck source=./prerequisites/install-functions/build_opencoarrays.sh
+source $opencoarrays_src_dir/prerequisites/install-functions/build_opencoarrays.sh
+
+# shellcheck source=./prerequisites/install-functions/report_results.sh
+source $opencoarrays_src_dir/prerequisites/install-functions/report_results.sh
+
+# ___________________ End of function definitions for use in the Main Body __________________
+
+
+# ________________________________ Start of the Main Body ___________________________________
+
+
+if [[ "${arg_v}" == "${__flag_present}" || "${arg_V}" == "opencoarrays" ]]; then
+
+  # Print script copyright if invoked with -v, -V, or --version argument
+  cmake_project_line=$(grep project ${opencoarrays_src_dir}/CMakeLists.txt | grep VERSION)
+  text_after_version_keyword="${cmake_project_line##*VERSION}"
+  text_before_language_keyword="${text_after_version_keyword%%LANGUAGES*}"
+  opencoarrays_version=$text_before_language_keyword
+  if [[ "${arg_v}" == "${__flag_present}" ]]; then
+    echo "OpenCoarrays ${opencoarrays_version}"
+    echo ""
+    echo "OpenCoarrays installer"
+    echo "Copyright (C) 2015-2016 Sourcery, Inc."
+    echo "Copyright (C) 2015-2016 Sourcery Institute"
+    echo ""
+    echo "OpenCoarrays comes with NO WARRANTY, to the extent permitted by law."
+    echo "You may redistribute copies of ${this_script} under the terms of the"
+    echo "BSD 3-Clause License.  For more information about these matters, see"
+    echo "http://www.sourceryinstitute.org/license.html"
+    echo ""
+  elif [[ "${arg_V}" == "opencoarrays" ]]; then
+    echo "${opencoarrays_version//[[:space:]]/}"
+  fi
+
+elif [[ ! -z "${arg_D:-${arg_P:-${arg_U:-${arg_V}}}}" ||  "${arg_l}" == "${__flag_present}" ]]; then
+
+  # Delegate to build.sh for the packages it builds
+  build_arg=${arg_D:-${arg_P:-${arg_U:-${arg_V:-${arg_p}}}}}
+  [ ! -z "${arg_D}" ] && build_flag="-D"
+  [ ! -z "${arg_P}" ] && build_flag="-P"
+  [ ! -z "${arg_U}" ] && build_flag="-U"
+  [ ! -z "${arg_V}" ] && build_flag="-V"
+  [ "${arg_l}" == "${__flag_present}" ] && build_flag="-l"
+
+  if [[ "${arg_P}" == "opencoarrays" ]]; then
+
+    version="$("${opencoarrays_src_dir}/install.sh" -V opencoarrays)"
+    echo "${install_path%/}/opencoarrays/${version}"
+
+  else
+
+    info "Invoking build script with the following command:"
+    info "\"${opencoarrays_src_dir}\"/prerequisites/build.sh \"${build_flag}\" \"${build_arg}\""
+    "${opencoarrays_src_dir}"/prerequisites/build.sh "${build_flag}" "${build_arg}"
+
+    # Add lines other packages the current script builds
+    if [[ "${arg_l}" == "${__flag_present}" ]]; then
+      echo "opencoarrays (version $("${opencoarrays_src_dir}/install.sh" -V opencoarrays))"
+      echo "ofp (version: ofp-sdf for OS X )"
+    fi
+  fi
+
+elif [[ "${arg_p:-}" == "opencoarrays" ]]; then
+
+  
+  cd prerequisites || exit 1
+  installation_record=install-opencoarrays.log
+  # shellcheck source=./prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh
+  source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh"
+  version="$("${opencoarrays_src_dir}/install.sh" -V opencoarrays)"
+  install_path="${install_path}/opencoarrays/${version}"
+  set_SUDO_if_needed_to_write_to_directory "${install_path}"
+  build_opencoarrays 2>&1 | tee ../"${installation_record}"
+  report_results 2>&1 | tee -a ../"${installation_record}"
+
+elif [[ "${arg_p:-}" == "ofp" ]]; then
+
+  info "Invoking Open Fortran Parser build script with the following command:"
+  info "\"${opencoarrays_src_dir}\"/prerequisites/install-ofp.sh"
+  "${opencoarrays_src_dir}"/prerequisites/install-ofp.sh
+
+elif [[ ! -z "${arg_p:-}" ]]; then
+
+  info "Invoking build script with the following command:"
+  info "\"${opencoarrays_src_dir}\"/prerequisites/build.sh ${@:-}"
+  "${opencoarrays_src_dir}"/prerequisites/build.sh ${@:-}
+
+fi
+# ____________________________________ End of Main Body ____________________________________________
diff --git a/install.sh-usage b/install.sh-usage
new file mode 100644
index 0000000..785412a
--- /dev/null
+++ b/install.sh-usage
@@ -0,0 +1,19 @@
+  -c --with-c [arg]           Use specified C compiler. 
+  -C --with-cxx [arg]         Use specified C++ compiler. 
+  -d --debug                  Enable debug mode.
+  -D --print-downloader [arg] Print download program for package specified in argument.
+  -e --verbose                Enable verbose mode, print script as it is executed.
+  -f --with-fortran [arg]     Use specified Fortran compiler. 
+  -h --help                   Print this page.
+  -i --install-prefix [arg]   Install package in specified path. Default="${OPENCOARRAYS_SRC_DIR}/prerequisites/installations/"
+  -I --install-version [arg]  Install package version.
+  -j --num-threads [arg]      Number of threads to use when invoking make. Default="1"
+  -l --list-packages          Print packages this script can install.
+  -m --with-cmake [arg]       Use specified CMake installation. Default="cmake"
+  -M --with-mpi [arg]         Use specified MPI installation.
+  -n --no-color               Disable color output.
+  -p --package [arg]          Install specified package. Default="opencoarrays"
+  -P --print-path [arg]       Print installation directory for specified package.
+  -U --print-url [arg]        Print download location for specified package.
+  -v --version                Print OpenCoarrays version number.
+  -V --print-version [arg]    Print version number for specified package.
diff --git a/prerequisites/acceptable_compiler.f90 b/prerequisites/acceptable_compiler.f90
new file mode 100644
index 0000000..404c4f4
--- /dev/null
+++ b/prerequisites/acceptable_compiler.f90
@@ -0,0 +1,39 @@
+!
+! acceptable_compiler
+!
+! -- Report whether the compiler version equals or exceeds the first
+!    OpenCoarrays-aware version
+!
+! OpenCoarrays is distributed under the OSI-approved BSD 3-clause License:
+! Copyright (c) 2015, 2016, Sourcery, Inc.
+! Copyright (c) 2015, 2016, Sourcery Institute
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without modification,
+! are permitted provided that the following conditions are met:
+!
+! 1. Redistributions of source code must retain the above copyright notice, this
+!    list of conditions and the following disclaimer.
+! 2. Redistributions in binary form must reproduce the above copyright notice, this
+!    list of conditions and the following disclaimer in the documentation and/or
+!    other materials provided with the distribution.
+! 3. Neither the names of the copyright holders nor the names of their contributors
+!    may be used to endorse or promote products derived from this software without
+!    specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+! IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+! INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+! NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+! WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+
+program main
+  use iso_fortran_env, only : compiler_version
+  implicit none
+  print *,compiler_version() >= "GCC version 6.1.0 "
+end program
diff --git a/prerequisites/build-functions/build_and_install.sh b/prerequisites/build-functions/build_and_install.sh
new file mode 100644
index 0000000..2a8cc63
--- /dev/null
+++ b/prerequisites/build-functions/build_and_install.sh
@@ -0,0 +1,51 @@
+# Make the build directory, configure, and build
+# shellcheck disable=SC2154
+build_and_install()
+{
+  num_threads=${arg_j}
+  build_path="${OPENCOARRAYS_SRC_DIR}/prerequisites/builds/${package_to_build}-${version_to_build}"
+
+  info "Building ${package_to_build} ${version_to_build}"
+  info "Build path: ${build_path}"
+  info "Installation path: ${install_path}"
+
+  set_SUDO_if_needed_to_write_to_directory "${build_path}"
+  set_SUDO_if_needed_to_write_to_directory "${install_path}"
+  mkdir -p "${build_path}"
+  info "pushd ${build_path}"
+  pushd "${build_path}"
+  if [[ "${package_to_build}" == "gcc" ]]; then
+    info "pushd ${download_path}/${package_source_directory} "
+    pushd "${download_path}/${package_source_directory}"
+    "${PWD}"/contrib/download_prerequisites
+    info "popd"
+    popd
+    info "Configuring gcc/g++/gfortran builds with the following command:"
+    info "${download_path}/${package_source_directory}/configure --prefix=${install_path} --enable-languages=c,c++,fortran,lto --disable-multilib --disable-werror"
+    "${download_path}/${package_source_directory}/configure" --prefix="${install_path}" --enable-languages=c,c++,fortran,lto --disable-multilib --disable-werror
+    info "Building with the following command: 'make -j${num_threads} bootstrap'"
+    make "-j${num_threads}" bootstrap
+    if [[ ! -z "${SUDO:-}" ]]; then
+      info "You do not have write permissions to the installation path ${install_path}"
+      info "If you have administrative privileges, enter your password to install ${package_to_build}"
+    fi
+    info "Installing with the following command: ${SUDO:-} make install"
+    ${SUDO:-} make install
+  else
+    info "Configuring ${package_to_build} ${version_to_build} with the following command:"
+    info "FC=\"${FC:-'gfortran'}\" CC=\"${CC:-'gcc'}\" CXX=\"${CXX:-'g++'}\" \"${download_path}/${package_source_directory}\"/configure --prefix=\"${install_path}\""
+    FC="${FC:-'gfortran'}" CC="${CC:-'gcc'}" CXX="${CXX:-'g++'}" "${download_path}/${package_source_directory}"/configure --prefix="${install_path}"
+    info "Building with the following command:"
+    info "FC=\"${FC:-'gfortran'}\" CC=\"${CC:-'gcc'}\" CXX=\"${CXX:-'g++'}\" make -j\"${num_threads}\""
+    FC="${FC:-'gfortran'}" CC="${CC:-'gcc'}" CXX="${CXX:-'g++'}" make "-j${num_threads}"
+    info "Installing ${package_to_build} in ${install_path}"
+    if [[ ! -z "${SUDO:-}" ]]; then
+      info "You do not have write permissions to the installation path ${install_path}"
+      info "If you have administrative privileges, enter your password to install ${package_to_build}"
+    fi
+    info "Installing with the following command: ${SUDO:-} make install"
+    ${SUDO:-} make install
+  fi
+  info "popd"
+  popd
+}
diff --git a/prerequisites/build-functions/download_if_necessary.sh b/prerequisites/build-functions/download_if_necessary.sh
new file mode 100644
index 0000000..4ff4d1c
--- /dev/null
+++ b/prerequisites/build-functions/download_if_necessary.sh
@@ -0,0 +1,87 @@
+# shellcheck source=./ftp-url.sh
+source "${OPENCOARRAYS_SRC_DIR}/prerequisites/build-functions/ftp-url.sh"
+# shellcheck source=./set_SUDO_if_needed_to_write_to_directory.sh
+source "${OPENCOARRAYS_SRC_DIR}/prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh"
+
+# Download package if the tar ball is not already in the present working directory
+# shellcheck disable=SC2154
+download_if_necessary()
+{
+  download_path="${OPENCOARRAYS_SRC_DIR}/prerequisites/downloads"
+  set_SUDO_if_needed_to_write_to_directory "${download_path}"
+  if  [[ -f "${download_path}/${url_tail}" || -d "${download_path}/${url_tail}" ]] ; then
+    info "Found '${url_tail}' in ${download_path}."
+    info "If it resulted from an incomplete download, building ${package_name} could fail."
+    info "Would you like to proceed anyway? (Y/n)"
+    read -r proceed
+    if [[ "${proceed}" == "n" || "${proceed}" == "N" || "${proceed}" == "no"  ]]; then
+      info "n"
+      info "Please remove $url_tail and restart the installation to to ensure a fresh download." 1>&2
+      emergency "Aborting. [exit 80]"
+    else
+      info "y"
+    fi
+  elif ! type "${fetch}" &> /dev/null; then
+    # The download mechanism is missing
+    info "The default download mechanism for ${package_name} is ${fetch}."
+    info "Please either ensure that ${fetch} is installed and in your PATH"
+    info "or download the ${package_name} source from "
+    info "${package_url}"
+    info "Place the downloaded file in ${download_path} and restart this script."
+    emergency "Aborting [exit 90]"
+  else
+    # The download mechanism is in the path.
+    if [[ "${fetch}" == "svn" ]]; then
+      if [[ ${version_to_build} == '--avail' || ${version_to_build} == '-a' ]]; then
+        args="ls"
+      else
+        args="checkout"
+      fi
+    elif [[ "${fetch}" == "wget" ]]; then
+      args="--no-check-certificate"
+    elif [[ "${fetch}" == "ftp-url" ]]; then
+      args="-n"
+    elif [[ "${fetch}" == "git" ]]; then
+      args="clone"
+    elif [[ "${fetch}" == "curl" ]]; then
+      first_three_characters=$(echo "${package_url}" | cut -c1-3)
+      if [[ "${first_three_characters}" == "ftp"  ]]; then 
+        args="-LO -u anonymous:"
+      elif [[ "${first_three_characters}" == "htt"  ]]; then 
+        args="-LO"
+      else
+        emergency "download_if_necessary.sh: Unrecognized URL."
+      fi
+    fi
+
+    if [[ "${fetch}" == "svn" || "${fetch}" == "git" ]]; then
+      package_source_directory="${url_tail}"
+    else
+      package_source_directory="${package_name}-${version_to_build}"
+    fi
+    info "Downloading ${package_name} ${version_to_build} to the following location:"
+    info "${download_path}/${package_source_directory}"
+    info "Download command: \"${fetch}\" ${args:-} ${package_url}"
+    info "Depending on the file size and network bandwidth, this could take several minutes or longer."
+    pushd "${download_path}"
+    "${fetch}" ${args:-} ${package_url}
+    popd
+    if [[ "${version_to_build}" == '--avail' || "${version_to_build}" == '-a' ]]; then
+      # In this case, args="ls" and the list of available versions has been printed so we can move on.
+      exit 1
+    fi
+    if [[ "${fetch}" == "svn" ]]; then
+      search_path="${download_path}/${version_to_build}"
+    else
+      search_path="${download_path}/${url_tail}"
+    fi
+    if [ -f "${search_path}" ] || [ -d "${search_path}" ]; then
+      info "Download succeeded. The ${package_name} source is in the following location:"
+      info "${search_path}"
+    else
+      info "Download failed. The ${package_name} source is not in the following, expected location:"
+      info "${search_path}"
+      emergency "Aborting. [exit 110]"
+    fi
+  fi
+}
diff --git a/prerequisites/build-functions/ftp-url.sh b/prerequisites/build-functions/ftp-url.sh
new file mode 100644
index 0000000..f9756f5
--- /dev/null
+++ b/prerequisites/build-functions/ftp-url.sh
@@ -0,0 +1,37 @@
+# Download a file from an anonymous ftp site
+#
+# Usage:
+#    ftp-url  <ftp-mode>  <ftp-site-address>:/<path-to-file>/<file-name>
+#
+# Example:
+#    ftp-url -n ftp.gnu.org:/gnu/m4/m4-1.4.17.tar.bz2
+ftp-url()
+{
+  ftp_mode="${1}"
+  url="${2}"
+
+  text_before_colon="${url%%:*}"
+  FTP_SERVER="${text_before_colon}"
+
+  text_after_colon="${url##*:}"
+  text_after_final_slash="${text_after_colon##*/}"
+  FILE_NAME="${text_after_final_slash}"
+
+  text_before_final_slash="${text_after_colon%/*}"
+  FILE_PATH="${text_before_final_slash}"
+
+  USERNAME=anonymous
+  PASSWORD=""
+  info "starting anonymous download: ftp ${ftp_mode} ${FTP_SERVER}... cd ${FILE_PATH}... get ${FILE_NAME}"
+
+ftp "${ftp_mode}" "${FTP_SERVER}" <<Done-ftp
+user "${USERNAME}" "${PASSWORD}"
+cd "${FILE_PATH}"
+passive
+binary
+get "${FILE_NAME}"
+bye
+Done-ftp
+
+info "finished anonymous ftp"
+}
diff --git a/prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh b/prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh
new file mode 100644
index 0000000..5218211
--- /dev/null
+++ b/prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh
@@ -0,0 +1,34 @@
+# Define the sudo command to be used if the installation path requires administrative permissions
+set_SUDO_if_needed_to_write_to_directory()
+{
+  if [[ $# != "1" ]]; then
+    emergency "set_SUDO_if_needed_to_write_to_directory takes exactly one argument"
+  else
+    directory_to_create=$1
+  fi
+  if [[ -z "${LD_LIBRARY_PATH:-}" ]]; then
+     info "\${LD_LIBRARY_PATH} is empty. Try setting it if the compiler encounters linking problems."
+  fi
+  SUDO_COMMAND="sudo env LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-} PATH=${PATH:-}"
+  info "Checking whether the directory ${directory_to_create} exists... "
+  if [[ -d "${directory_to_create}" ]]; then
+    info "yes"
+    info "Checking whether I have write permissions to ${directory_to_create} ... "
+    if [[ -w "${directory_to_create}" ]]; then
+      info "yes"
+    else
+      info "no"
+      SUDO="${SUDO_COMMAND}"
+    fi
+  else
+    info "no"
+    info "Checking whether I can create ${directory_to_create} ... "
+    if mkdir -p "${directory_to_create}" >& /dev/null; then
+      info "yes."
+    else
+      info "no."
+      # shellcheck disable=SC2034
+      SUDO="${SUDO_COMMAND}"
+    fi
+  fi
+}
diff --git a/prerequisites/build-functions/set_compilers.sh b/prerequisites/build-functions/set_compilers.sh
new file mode 100644
index 0000000..642e1f4
--- /dev/null
+++ b/prerequisites/build-functions/set_compilers.sh
@@ -0,0 +1,42 @@
+# Set CC, CXX, and FC
+# shellcheck disable=SC2154
+set_compilers()
+{
+  # If FC is already set, use the set value.
+  # Else if FC is empty, use the value specified via the -f or --with-fortran command-line arguments.
+  # Else use the default specifed in ../build.sh-usage.
+  FC=${FC:-${arg_f}}
+  info "Fortran compiler: ${FC}"
+
+  if [[ $(uname) == "Darwin" && "${package_to_build}" == "cmake"  ]]; then
+    if [[ -x "/usr/bin/gcc" ]]; then
+      [ ! -z "${CC:-}" ] && info "Overriding CC: cmake build requires Apple LLVM gcc, which XCode command-line tools puts in /usr/bin"
+      CC=/usr/bin/gcc
+    else
+      info      "OS X detected.  Please install XCode command-line tools and "
+      emergency "ensure that /usr/bin/gcc exists and is executable. Aborting."
+    fi
+  else
+    # If CC is already set, use the set value.
+    # Else if CC is empty, use the value specified via the -c or --with-c command-line arguments.
+    # Else use the default specifed in ../build.sh-usage.
+    CC=${CC:-${arg_c}}
+  fi
+  info "C compiler: ${CC}"
+
+  if [[ $(uname) == "Darwin" && ${package_to_build} == "cmake"  ]]; then
+    if [[ -x "/usr/bin/g++" ]]; then
+      [ ! -z "${CXX:-}" ] && info "Overriding CXX: cmake build requires Apple LLVM g++, which XCode command-line tools puts in /usr/bin"
+      CXX=/usr/bin/g++
+    else
+      info      "OS X detected.  Please install XCode command-line tools and "
+      emergency "ensure that /usr/bin/g++ exists and is executable. Aborting."
+    fi
+  else
+    # If CXX is already set, use the set value.
+    # Else if CXX is empty, use the value specified via the -C or --with-cxx command-line arguments.
+    # Else use the default specifed in ../build.sh-usage.
+    CXX=${CXX:-${arg_C}}
+  fi
+  info "C++ compiler: ${CXX}"
+}
diff --git a/prerequisites/build-functions/set_or_print_default_version.sh b/prerequisites/build-functions/set_or_print_default_version.sh
new file mode 100644
index 0000000..ccb0697
--- /dev/null
+++ b/prerequisites/build-functions/set_or_print_default_version.sh
@@ -0,0 +1,62 @@
+# If -p, -D, -P, or -U specifies a package, set default_version
+# If -V specifies a package, print the default_version and exit with normal status
+# If -l is present, list all packages and versions and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_default_version()
+{
+  # Verify requirements
+  [ "${arg_l}" == "${__flag_present}" ] && [ ! -z "${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}" ] &&
+    emergency "Please pass only one of {-l, -D, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+
+  if [[ "${arg_l}" == "${__flag_present}" ]]; then
+    echo "This script can build the following packages:"
+  fi
+  # Get package name from argument passed with -p, -V, -D, or -U
+  package_name="${arg_p:-${arg_D:-${arg_P:-${arg_U:-${arg_V}}}}}" # not needed for -l
+
+  if [[ "${package_name}" == "ofp" ]]; then
+    ${OPENCOARRAYS_SRC_DIR}/prerequisites/install-ofp.sh "${@}"
+    exit 0
+  fi 
+
+  [ "${package_name}" == "opencoarrays" ] && 
+    emergency "Please use this script with a previously downloaded opencoarrays source archive. This script does not download opencoarrays "
+  # This is a bash 3 hack standing in for a bash 4 hash (bash 3 is the lowest common
+  # denominator because, for licensing reasons, OS X only has bash 3 by default.)
+  # See http://stackoverflow.com/questions/1494178/how-to-define-hash-tables-in-bash
+  package_version=(
+    "cmake:3.4.0"
+    "gcc:6.1.0"
+    "mpich:3.1.4"
+    "wget:1.16.3"
+    "flex:2.6.0"
+    "bison:3.0.4"
+    "pkg-config:0.28"
+    "make:4.1"
+    "m4:1.4.17"
+    "subversion:1.9.4"
+    "ofp:sdf"
+  )
+  for package in "${package_version[@]}" ; do
+     KEY="${package%%:*}"
+     VALUE="${package##*:}"
+     if [[  "${arg_l}" == "${__flag_present}" ]]; then
+       # If the list was requested, print the current element of the name:version list.
+       printf "%s (default version %s)\n" "${KEY}" "${VALUE}"
+     elif [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the default version:
+       default_version=${VALUE}
+       # If a printout of the default version number was requested, then print it and exit with normal status
+       [[ ! -z "${arg_V}" ]] && printf "%s\n" "${default_version}" && exit 0
+       break # exit the for loop
+     fi
+  done
+
+ # Exit with normal status (package/version has been printed).
+ [ "${arg_l}" == "${__flag_present}" ] && exit 0
+
+ # Exit with error status and diagnostic output if empty default_version
+ if [[ -z "${default_version:-}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use --l or --list-packages to list the allowable names."
+ fi
+}
diff --git a/prerequisites/build-functions/set_or_print_downloader.sh b/prerequisites/build-functions/set_or_print_downloader.sh
new file mode 100644
index 0000000..93f0847
--- /dev/null
+++ b/prerequisites/build-functions/set_or_print_downloader.sh
@@ -0,0 +1,57 @@
+# If -p, -P, -U, or -V specifies a package, set fetch variable
+# If -D specifies a package, print "${fetch}" and exit with normal status
+# If -l is present, list all packages and versions and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_downloader()
+{
+  # Verify requirements
+  [ ! -z "${arg_D}" ] && [ ! -z "${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}" ] &&
+    emergency "Please pass only one of {-D, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+
+  package_name="${arg_p:-${arg_D:-${arg_P:-${arg_U:-${arg_V}}}}}"
+  if [[ "${package_name}" == "ofp" ]]; then
+    ${OPENCOARRAYS_SRC_DIR}/prerequisites/install-ofp.sh "${@}"
+    exit 0
+  fi
+  if [[ $(uname) == "Darwin" ]]; then
+    wget_or_curl=curl
+    ftp_or_curl=curl
+  else
+    wget_or_curl=wget
+    ftp_or_curl=ftp-url
+  fi
+  if [[ "${package_name}" == "gcc" && "${version_to_build}" != "trunk" ]]; then
+    gcc_fetch="${ftp_or_curl}"
+  else
+    gcc_fetch="svn"
+  fi
+  # This is a bash 3 hack standing in for a bash 4 hash (bash 3 is the lowest common
+  # denominator because, for licensing reasons, OS X only has bash 3 by default.)
+  # See http://stackoverflow.com/questions/1494178/how-to-define-hash-tables-in-bash
+  package_fetch=(
+    "gcc:${gcc_fetch}"
+    "wget:${ftp_or_curl}"
+    "cmake:${wget_or_curl}"
+    "mpich:${wget_or_curl}"
+    "flex:${wget_or_curl}"
+    "bison:${ftp_or_curl}"
+    "pkg-config:${wget_or_curl}"
+    "make:${ftp_or_curl}"
+    "m4:${ftp_or_curl}"
+    "subversion:${wget_or_curl}"
+  )
+  for package in "${package_fetch[@]}" ; do
+     KEY="${package%%:*}"
+     VALUE="${package##*:}"
+     if [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the download mechanism:
+       fetch=${VALUE}
+       # If a printout of the download mechanism was requested, then print it and exit with normal status
+       [[ ! -z "${arg_D}" ]] && printf "%s\n" "${fetch}" && exit 0
+       break # exit the for loop
+     fi
+  done
+  if [[ -z "${fetch:-}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use --l or --list-packages to list the allowable names."
+  fi
+}
diff --git a/prerequisites/build-functions/set_or_print_installation_path.sh b/prerequisites/build-functions/set_or_print_installation_path.sh
new file mode 100644
index 0000000..6c0794b
--- /dev/null
+++ b/prerequisites/build-functions/set_or_print_installation_path.sh
@@ -0,0 +1,19 @@
+# If -P specifies a package, print the installation path and exit with normal status
+# Otherwise, set install_path
+# shellcheck disable=SC2154
+set_or_print_installation_path()
+{
+  # Verify requirements
+  [ ! -z "${arg_P}" ] && [ ! -z "${arg_p:-${arg_D:-${arg_U:-${arg_V}}}}" ] &&
+    emergency "Please pass only one of {-D, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+
+  install_path="${arg_i%/}/${arg_p:-${arg_D:-${arg_P:-${arg_U:-${arg_V}}}}}/${version_to_build}"
+
+  # If -P is present, print ${install_path} and exit with normal status
+  if [[ ! -z "${arg_P:-}" ]]; then
+    printf "%s\n" "${install_path}"
+    exit 0
+  fi
+
+  info "${package_name} ${version_to_build} installation path: ${install_path}"
+}
diff --git a/prerequisites/build-functions/set_or_print_url.sh b/prerequisites/build-functions/set_or_print_url.sh
new file mode 100644
index 0000000..6fecd9e
--- /dev/null
+++ b/prerequisites/build-functions/set_or_print_url.sh
@@ -0,0 +1,85 @@
+# If -p, -D, -P, or -V specifies a package, set package_url
+# If -U specifies a package, print the package_url and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_url()
+{
+  # Verify requirements
+  [ ! -z "${arg_U}" ] && [ ! -z "${arg_D:-${arg_p:-${arg_P:-${arg_V}}}}" ] &&
+    emergency "Please pass only one of {-D, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+
+  # Get package name from argument passed with  -p, -D, -P, -V, or -U
+  package_to_build="${arg_p:-${arg_D:-${arg_P:-${arg_U:-${arg_V}}}}}"
+
+  if [[ "${package_to_build}" == 'cmake' ]]; then
+    major_minor="${version_to_build%.*}"
+  elif [[ "${package_to_build}" == "gcc" && "${version_to_build}" != "trunk" ]]; then
+    gcc_url_head="ftp.gnu.org:/gnu/gcc/gcc-${version_to_build}/"
+  else
+    gcc_url_head="svn://gcc.gnu.org/svn/gcc/"
+  fi
+  package_url_head=(
+    "gcc;${gcc_url_head:-}"
+    "wget;ftp.gnu.org:/gnu/wget/"
+    "m4;ftp.gnu.org:/gnu/m4/"
+    "pkg-config;http://pkgconfig.freedesktop.org/releases/"
+    "mpich;http://www.mpich.org/static/downloads/${version_to_build}/"
+    "flex;http://sourceforge.net/projects/flex/files/"
+    "make;ftp://ftp.gnu.org/gnu/make/"
+    "bison;ftp.gnu.org:/gnu/bison/"
+    "cmake;http://www.cmake.org/files/v${major_minor-}/"
+    "subversion;http://www.eu.apache.org/dist/subversion/"
+  )
+  for package in "${package_url_head[@]}" ; do
+     KEY="${package%%;*}"
+     VALUE="${package##*;}"
+     if [[ "${package_to_build}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the URL head:
+       url_head="${VALUE}"
+       break
+     fi
+  done
+
+  # Set differing tails for GCC trunk versus branches
+  if [[ "${package_to_build}" == 'gcc' ]]; then
+    if [[ "${fetch}" == 'svn' ]]; then
+      gcc_tail="${version_to_build}"
+    elif [[ "${version_to_build}" == '--avail' || "${version_to_build}" == '-a' ]]; then
+      gcc_tail='branches'
+    else
+      gcc_tail="gcc-${version_to_build}.tar.bz2"
+    fi
+  fi
+  package_url_tail=(
+    "gcc;${gcc_tail:-}"
+    "wget;wget-${version_to_build}.tar.gz"
+    "m4;m4-${version_to_build}.tar.bz2"
+    "pkg-config;pkg-config-${version_to_build}.tar.gz"
+    "mpich;mpich-${version_to_build}.tar.gz"
+    "flex;flex-${version_to_build}.tar.bz2"
+    "bison;bison-${version_to_build}.tar.gz"
+    "make;make-${version_to_build}.tar.bz2"
+    "cmake;cmake-${version_to_build}.tar.gz"
+    "subversion;subversion-${version_to_build}.tar.gz"
+  )
+  for package in "${package_url_tail[@]}" ; do
+     KEY="${package%%;*}"
+     VALUE="${package##*;}"
+     if [[ "${package_to_build}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the URL tail:
+       url_tail="${VALUE}"
+       break
+     fi
+  done
+
+  if [[ -z "${url_head:-}" || -z "${url_tail}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use --l or --list-packages to list the allowable names."
+  fi
+
+  package_url="${url_head}""${url_tail}"
+
+  # If a printout of the package URL was requested, then print it and exit with normal status
+  if [[ ! -z "${arg_U:-}" ]]; then
+    printf "%s\n" "${package_url}"
+    exit 0
+  fi
+}
diff --git a/prerequisites/build-functions/unpack_if_necessary.sh b/prerequisites/build-functions/unpack_if_necessary.sh
new file mode 100644
index 0000000..de8dc6a
--- /dev/null
+++ b/prerequisites/build-functions/unpack_if_necessary.sh
@@ -0,0 +1,18 @@
+# Unpack if the unpacked tar ball is not in the present working directory
+# shellcheck disable=SC2154
+unpack_if_necessary()
+{
+  if [[ "${fetch}" == "svn" || "${fetch}" == "git" ]]; then
+    package_source_directory="${version_to_build}"
+  else
+    info "Unpacking ${url_tail}."
+    info "pushd ${download_path}"
+    pushd "${download_path}"
+    info "Unpack command: tar xf ${url_tail}"
+    tar xf "${url_tail}"
+    info "popd"
+    popd
+    # shellcheck disable=SC2034
+    package_source_directory="${package_name}-${version_to_build}"
+  fi
+}
diff --git a/prerequisites/build.sh b/prerequisites/build.sh
new file mode 100755
index 0000000..e3e8906
--- /dev/null
+++ b/prerequisites/build.sh
@@ -0,0 +1,147 @@
+#!/usr/bin/env bash
+# BASH3 Boilerplate
+#
+# build.sh
+#
+#  - Build OpenCoarrays prerequisite packages and their prerequisites
+#
+# Usage: LOG_LEVEL=7 B3B_USE_CASE=/opt/bash3boilerplate/src/use-case ./my-script.sh -f script_input.txt
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# The invocation of bootstrap.sh below performs the following tasks:
+# (1) Import several bash3boilerplate helper functions & default settings.
+# (2) Set several variables describing the current file and its usage page.
+# (3) Parse the usage information (default usage file name: current file's name with -usage appended).
+# (4) Parse the command line using the usage information.
+
+
+export OPENCOARRAYS_SRC_DIR="${OPENCOARRAYS_SRC_DIR:-${PWD%prerequisites*}}"
+export __usage=${OPENCOARRAYS_SRC_DIR}/prerequisites/build.sh-usage
+if [[ ! -f "${OPENCOARRAYS_SRC_DIR}/src/libcaf.h" ]]; then
+  echo "Please run this script inside the OpenCoarrays source \"prerequisites\" subdirectory"
+  echo "or set OPENCOARRAYS_SRC_DIR to the top-level OpenCoarrays source directory path."
+  exit 1
+fi
+export B3B_USE_CASE="${B3B_USE_CASE:-${OPENCOARRAYS_SRC_DIR}/prerequisites/use-case}"
+if [[ ! -f "${B3B_USE_CASE:-}/bootstrap.sh" ]]; then
+  echo "Please set B3B_USE_CASE to the bash3boilerplate use-case directory path."
+  exit 2
+fi
+# shellcheck source=./use-case/bootstrap.sh
+source "${B3B_USE_CASE}/bootstrap.sh" "$@"
+
+
+
+# Set up a function to call when receiving an EXIT signal to do some cleanup. Remove if
+# not needed. Other signals can be trapped too, like SIGINT and SIGTERM.
+function cleanup_before_exit () {
+  info "Cleaning up. Done"
+}
+trap cleanup_before_exit EXIT # The signal is specified here. Could be SIGINT, SIGTERM etc.
+
+### Validation (decide what's required for running your script and error out)
+#####################################################################
+
+export __flag_present=1
+
+# shellcheck disable=SC2154
+if [[ "${arg_l}" != "${__flag_present}" && "${arg_L}" != "${__flag_present}" &&
+      "${arg_v}" != "${__flag_present}" && "${arg_h}" != "${__flag_present}" &&
+      -z "${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}" ]]; then
+  help "${__base}: Insufficient arguments. Please pass either -D, -h, -l, -L, -p, -P, -U, -v, -V, or a longer equivalent."
+fi
+
+# Suppress info and debug messages if -l, -P, -U, -V, -D, or their longer equivalent is present:
+[[ "${arg_l}" == "${__flag_present}" || ! -z "${arg_P:-${arg_U:-${arg_V:-${arg_D}}}}" ]] && suppress_info_debug_messages
+
+[ -z "${LOG_LEVEL:-}" ] && emergency "Cannot continue without LOG_LEVEL. "
+
+### Enforce mutual exclusivity of arguments that print single-line output
+[ ! -z "${arg_P:-}" ] && [ ! -z "${arg_V:-}" ] && emergency "Only specify one of -P, -U, -V, or their long-form equivalents."
+[ ! -z "${arg_P:-}" ] && [ ! -z "${arg_U:-}" ] && emergency "Only specify one of -P, -U, -V, or their long-form equivalents."
+[ ! -z "${arg_U:-}" ] && [ ! -z "${arg_V:-}" ] && emergency "Only specify one of -P, -U, -V, or their long-form equivalents."
+
+### Print bootstrapped magic variables to STDERR when LOG_LEVEL
+### is at the default value (6) or above.
+#####################################################################
+# shellcheck disable=SC2154
+{
+info "__file: ${__file}"
+info "__dir: ${__dir}"
+info "__base: ${__base}"
+info "__os: ${__os}"
+info "__usage: ${__usage}"
+info "LOG_LEVEL: ${LOG_LEVEL}"
+
+info "-b (--branch):           ${arg_b} "
+info "-c (--with-c):           ${arg_c} "
+info "-C (--with-cxx):         ${arg_C} "
+info "-d (--debug):            ${arg_d} "
+info "-D (--print-downloader): ${arg_D} "
+info "-e (--verbose):          ${arg_e} "
+info "-f (--with-fortran):     ${arg_f} "
+info "-h (--help):             ${arg_h} "
+info "-i (--install-prefix):   ${arg_i} "
+info "-j (--num-threads):      ${arg_j} "
+info "-I (--install-version):  ${arg_I} "
+info "-l (--list-packages):    ${arg_l} "
+info "-L (--list-branches):    ${arg_L} "
+info "-m (--with-cmake):       ${arg_m} "
+info "-M (--with-mpi):         ${arg_M} "
+info "-n (--no-color):         ${arg_n} "
+info "-p (--package):          ${arg_p}"
+info "-P (--print-path):       ${arg_P} "
+info "-t (--with-tau):         ${arg_t} "
+info "-U (--print-url):        ${arg_U} "
+info "-v (--version):          ${arg_v} "
+info "-V (--print-version):    ${arg_V} "
+}
+
+# shellcheck source=./build-functions/set_or_print_default_version.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_or_print_default_version.sh"
+set_or_print_default_version "${@}"
+export version_to_build="${arg_I:-${default_version}}"
+
+# shellcheck source=./build-functions/set_or_print_downloader.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_or_print_downloader.sh"
+set_or_print_downloader "${@}"
+
+# shellcheck source=./build-functions/set_or_print_url.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_or_print_url.sh"
+set_or_print_url
+
+# shellcheck source=./build-functions/set_or_print_installation_path.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_or_print_installation_path.sh"
+set_or_print_installation_path
+
+# shellcheck source=./build-functions/download_if_necessary.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/download_if_necessary.sh"
+download_if_necessary
+
+# shellcheck source=./build-functions/unpack_if_necessary.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/unpack_if_necessary.sh"
+unpack_if_necessary
+
+# shellcheck source=./build-functions/set_compilers.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_compilers.sh"
+set_compilers
+
+# shellcheck source=./build-functions/build_and_install.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/build_and_install.sh"
+build_and_install
diff --git a/prerequisites/build.sh-usage b/prerequisites/build.sh-usage
new file mode 100644
index 0000000..a7533f3
--- /dev/null
+++ b/prerequisites/build.sh-usage
@@ -0,0 +1,23 @@
+  -b --branch [arg]           Repository branch to download. Default="trunk"
+  -c --with-c [arg]           Use the specified C compiler. Default="gcc"
+  -C --with-cxx [arg]         Use the specified C++ compiler. Default="g++"
+  -d --debug                  Enable debug mode.
+  -D --print-downloader [arg] Print download program for package specified in argument. 
+  -e --verbose                Enable verbose mode, print script as it is executed.
+  -f --with-fortran [arg]     Specify Fortran compiler location. Default="gfortran"
+  -h --help                   This page.
+  -i --install-prefix [arg]   Install package in specified path. Default="${OPENCOARRAYS_SRC_DIR%/}/prerequisites/installations/"
+  -j --num-threads [arg]      Number of threads to use when invoking make. Default="1"
+  -I --install-version [arg]  Package version to install. (To see default, use -V or --print-version)
+  -l --list-packages          List the packages this script can install.
+  -L --list-branches          List the repository branches this script can download.
+  -m --with-cmake [arg]       Use the specified CMake installation. Default="cmake"
+  -M --with-mpi [arg]         Use the specified MPI installation. 
+  -n --no-color               Disable color output.
+  -p --package [arg]          Package to install. 
+  -P --print-path [arg]       Print installation path for package specified in argument.
+  -t --with-tau [arg]         Use the specified TAU parallel performance utilities installation. Default="tauf90"
+  -T --install-tau [arg]      Install TAU in specified path. Default="${OPENCOARRAYS_SRC_DIR}/prerequisites/installations/${package_name:-}/${version_to_build:-}/"
+  -U --print-url [arg]        Print URL for package specified in argument. 
+  -v --version                Print OpenCoarrays version number.
+  -V --print-version [arg]    Print installation version for package specified in argument.
diff --git a/prerequisites/check_version.sh b/prerequisites/check_version.sh
new file mode 100755
index 0000000..69f6c4d
--- /dev/null
+++ b/prerequisites/check_version.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+#
+# check_version.sh
+#
+# -- Verify whether an OpenCoarrays prerequisite meets the required minimum version number.
+#
+# OpenCoarrays is distributed under the OSI-approved BSD 3-clause License:
+# Copyright (c) 2015-2016, Sourcery, Inc.
+# Copyright (c) 2015-2016, Sourcery Institute
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice, this
+#    list of conditions and the following disclaimer in the documentation and/or
+#    other materials provided with the distribution.
+# 3. Neither the names of the copyright holders nor the names of their contributors
+#    may be used to endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# Interpret the first argument as the package executable name
+package="$1"
+# Interpret the second argument as the minimimum acceptable package version number
+minimum_version="$2"
+# Interpret the third argument as indicating the desired verbosity
+verbose=$3
+
+this_script=$(basename "$0")
+
+usage()
+{
+    echo ""
+    echo " $this_script - Bash script for verifyin minimum version numbers for OpenCoarrays prerequisites"
+    echo ""
+    echo " Usage (optional arguments in square brackets): "
+    echo "      $this_script [<option>]"
+    echo "      $this_script  <package-name> <minimum-version-number> [--verbose]"
+    echo ""
+    echo " Options:"
+    echo "   --help, -h           Show this help message"
+    echo "   --list , -l          List the packages whose versions this script can verify"
+    echo ""
+    echo " Examples:"
+    echo ""
+    echo "   $this_script cmake 3.4.0"
+    echo "   $this_script flex 2.6.0 --verbose"
+    echo "   $this_script flex $(./build.sh -V flex )"
+    echo "   $this_script --help"
+    echo "   $this_script --list"
+    echo ""
+    echo "[exit 10]"
+    exit 10
+}
+
+# Print usage information and exit if script is invoked without arguments or with --help or -h as the first argument
+if [ $# == 0 ]; then
+  usage | less
+  exit 20
+
+elif [[ $1 == '--help' || $1 == '-h' ]]; then
+  usage | less
+  exit 0
+
+elif [[ $1 == '--list' || $1 == '-l' ]]; then
+ echo "$this_script currently verifies minimum version numbers for the following OpenCoarrays prerequisites:"
+ echo "   cmake, flex, bison, m4"
+ exit 40
+
+elif [[ $1 == '-v' || $1 == '-V' || $1 == '--version' ]]; then
+  # Print script copyright if invoked with -v, -V, or --version argument
+  cmake_project_line=$(grep project ../CMakeLists.txt | grep VERSION)
+  text_after_version_keyword="${cmake_project_line##*VERSION}"
+  text_before_language_keyword="${text_after_version_keyword%%LANGUAGES*}"
+  opencoarrays_version=$text_before_language_keyword
+  echo "opencoarrays $opencoarrays_version"
+  echo ""
+  echo "OpenCoarrays prerequisite version verifier"
+  echo "Copyright (C) 2015-2016 Sourcery, Inc."
+  echo "Copyright (C) 2015-2016 Sourcery Institute"
+  echo ""
+  echo "OpenCoarrays comes with NO WARRANTY, to the extent permitted by law."
+  echo "You may redistribute copies of $this_script under the terms of the"
+  echo "BSD 3-Clause License.  For more information about these matters, see"
+  echo "http://www.sourceryinstitute.org/license.html"
+  echo ""
+fi
+
+package_version_header=$($package --version | head -1)
+if [[ "$verbose" == "--verbose" ]]; then
+  echo "$package_version_header"
+fi
+
+# Extract the text after the final space:
+version=${package_version_header##* }
+major=${version%%.*}
+minor_patch=${version#*.}
+minor=${minor_patch%%.*}
+patch=${version##*.}
+if [[ "$verbose" == "--verbose" ]]; then
+  echo "$version = $major . $minor . $patch"
+fi
+
+# Extract the text after the final space:
+min_major=${minimum_version%%.*}
+min_minor_patch=${minimum_version#*.}
+min_minor=${min_minor_patch%%.*}
+min_patch=${minimum_version##*.}
+if [[ "$verbose" == "--verbose" ]]; then
+  echo "$minimum_version = $min_major . $min_minor . $min_patch"
+fi
+
+if [ "$(( major < min_major ))" -ne 0 ]; then
+  if [[ $verbose == "--verbose" ]]; then
+    echo "$major < $min_major"
+  fi
+  exit 10
+elif [[ $verbose == "--verbose" ]]; then
+  echo "$major >= $min_major"
+fi
+
+if [ "$(( minor < min_minor ))" -ne 0 ]; then
+  if [[ $verbose == "--verbose" ]]; then
+    echo "$minor < $min_minor"
+  fi
+  exit 20
+elif [[ $verbose == "--verbose" ]]; then
+  echo "$minor >= $min_minor"
+fi
+
+if [ "$(( patch < min_patch ))" -ne 0 ]; then
+  if [[ $verbose == "--verbose" ]]; then
+    echo "$patch < $min_patch"
+  fi
+  exit 30
+elif [[ $verbose == "--verbose" ]]; then
+  echo "$patch >= $min_patch"
+fi
+exit 0
diff --git a/prerequisites/install-binary-functions/build_parse_table.sh b/prerequisites/install-binary-functions/build_parse_table.sh
new file mode 100644
index 0000000..a247270
--- /dev/null
+++ b/prerequisites/install-binary-functions/build_parse_table.sh
@@ -0,0 +1,13 @@
+# Build the Open Fortran Parser parse table
+# shellcheck disable=SC2154
+function build_parse_table()
+{
+  info "Building parse table"
+  pushd "${install_path}/ofp-sdf/fortran/syntax"
+  info "Build command: make SDF2_PATH=\"${SDF2_PATH}\" ST_PATH=\"${ST_PATH}\" DYLD_LIBRARY_PATH=\"${DYLD_LIBRARY_PATH}\""
+  make SDF2_PATH="${SDF2_PATH}" ST_PATH="${ST_PATH}" DYLD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}"
+  popd
+  pushd "${install_path}/ofp-sdf/fortran/trans"
+  make
+  popd
+}
diff --git a/prerequisites/install-binary-functions/move_binaries_to_install_path.sh b/prerequisites/install-binary-functions/move_binaries_to_install_path.sh
new file mode 100644
index 0000000..6270234
--- /dev/null
+++ b/prerequisites/install-binary-functions/move_binaries_to_install_path.sh
@@ -0,0 +1,44 @@
+install_or_skip()
+{
+  SUDO=${1}
+  install_path="${2}"
+  package_to_install="${3}"
+  download_path="${4}"
+  if [[ -d "${install_path}/${package_to_install}" ]]; then
+    info "The following installation path exists:"
+    info "${install_path}/${package_to_install}"
+    info "If you want to replace it, please remove it and restart this script."
+    info "Skipping ${package_to_install} installation."
+  else
+    info "Installing ${package_to_install} with the following command:"
+    info "${SUDO} mv ${download_path}/${package_to_install} ${install_path}"
+    ${SUDO:-} mv "${download_path}/${package_to_install}" "${install_path}"
+    if [[ -x "${install_path}/${package_to_install}"  ]]; then
+      info "Installation complete for ${package_to_install} in the following location:"
+      info "${install_path}/${package_to_install}"
+    else
+      info "Something went wrong.  Either ${package_to_install} is not in the following expected location"
+      info "or the user lacks executable permissions for the directory:"
+      emergency "${install_path}/${package_to_install}"
+    fi
+  fi
+}
+
+function move_binaries_to_install_path()
+{
+  info "Installation package names: ${install_names}"
+  package_to_install="${install_names%%,*}" # remove longest back-end match for ,*
+  remaining_packages="${install_names#*,}"  # remove shortest front-end match for *,
+  if [[ ! -d "${install_path}" ]]; then
+    ${SUDO:-} mkdir -p "${install_path}"
+  fi
+  while [[ "${package_to_install}" != "${remaining_packages}" ]]; do
+    install_or_skip  "${SUDO:-}" "${install_path}" "${package_to_install}" "${download_path}"
+    info "Remaining installation package names:  ${remaining_packages}"
+    install_names="${remaining_packages}"
+    package_to_install="${install_names%%,*}" # remove longest back-end match for ,*
+    remaining_packages="${install_names#*,}"  # remove shortest front-end for *,
+    info "Installing ${package_to_install} binary with the following command:"
+  done
+  install_or_skip  "${SUDO:-}" "${install_path}" "${package_to_install}" "${download_path}"
+}
diff --git a/prerequisites/install-binary-functions/set_or_print_csv_binary_names.sh b/prerequisites/install-binary-functions/set_or_print_csv_binary_names.sh
new file mode 100644
index 0000000..5f1c5a4
--- /dev/null
+++ b/prerequisites/install-binary-functions/set_or_print_csv_binary_names.sh
@@ -0,0 +1,31 @@
+# If -p, -P, -U, or -V specifies a package, set fetch variable
+# If -D specifies a package, print "${fetch}" and exit with normal status
+# If -l is present, list all packages and versions and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_csv_binary_names()
+{
+  # Verify requirements
+  [ "${arg_N:-}" == "${__flag_present}" ] && [ ! -z "${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}" ] &&
+    emergency "Please pass only one of {-D, -N, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+
+  # This is a bash 3 hack standing in for a bash 4 hash (bash 3 is the lowest common
+  # denominator because, for licensing reasons, OS X only has bash 3 by default.)
+  # See http://stackoverflow.com/questions/1494178/how-to-define-hash-tables-in-bash
+  package_install_names=(
+    "strategoxt-superbundle:aterm,sdf2-bundle,strategoxt"
+  )
+  for package in "${package_install_names[@]}" ; do
+     KEY="${package%%:*}"
+     VALUE="${package##*:}"
+     if [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the download mechanism:
+       install_names=${VALUE}
+       # If a printout of the download mechanism was requested, then print it and exit with normal status
+       [[ "${arg_N:-}" == "${__flag_present}" ]] && printf "%s\n" "${install_names}" && exit 0
+       break # exit the for loop
+     fi
+  done
+  if [[ -z "${install_names:-}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use -l or --list-packages to list the allowable names."
+  fi
+}
diff --git a/prerequisites/install-binary-functions/set_or_print_default_version.sh b/prerequisites/install-binary-functions/set_or_print_default_version.sh
new file mode 100644
index 0000000..a880a05
--- /dev/null
+++ b/prerequisites/install-binary-functions/set_or_print_default_version.sh
@@ -0,0 +1,47 @@
+# If -p, -D, -P, or -U specifies a package, set default_version
+# If -V specifies a package, print the default_version and exit with normal status
+# If -l is present, list all packages and versions and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_default_version()
+{
+  # Verify requirements
+  if [[ "${arg_l}" == "${__flag_present}" ]]; then
+    if [[ "${arg_N:-}" == "${__flag_present}" || ! -z "${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}" ]]; then
+      emergency "Please pass only one of {-l, -D, -N, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+    fi
+  fi
+
+  if [[ "${arg_l}" == "${__flag_present}" ]]; then
+    echo "This script can install the following packages:"
+  fi
+  # Get package name from argument passed with -p, -V, -D, or -U
+  package_name="${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V:-}}}}}" # not needed for -l
+  # This is a bash 3 hack standing in for a bash 4 hash (bash 3 is the lowest common
+  # denominator because, for licensing reasons, OS X only has bash 3 by default.)
+  # See http://stackoverflow.com/questions/1494178/how-to-define-hash-tables-in-bash
+  package_version=(
+    "strategoxt-superbundle:0.17-macosx"
+  )
+  for package in "${package_version[@]}" ; do
+     KEY="${package%%:*}"
+     VALUE="${package##*:}"
+     if [[  "${arg_l}" == "${__flag_present}" ]]; then
+       # If the list was requested, print the current element of the name:version list.
+       printf "%s (default version %s)\n" "${KEY}" "${VALUE}"
+     elif [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the default version:
+       default_version=${VALUE}
+       # If a printout of the default version number was requested, then print it and exit with normal status
+       [[ ! -z "${arg_V}" ]] && printf "%s\n" "${default_version}" && exit 0
+       break # exit the for loop
+     fi
+  done
+
+ # Exit with normal status (package/version has been printed).
+ [ "${arg_l}" == "${__flag_present}" ] && exit 0
+
+ # Exit with error status and diagnostic output if empty default_version
+ if [[ -z "${default_version:-}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use -l or --list-packages to list the allowable names."
+ fi
+}
diff --git a/prerequisites/install-binary-functions/set_or_print_downloader.sh b/prerequisites/install-binary-functions/set_or_print_downloader.sh
new file mode 100644
index 0000000..422a159
--- /dev/null
+++ b/prerequisites/install-binary-functions/set_or_print_downloader.sh
@@ -0,0 +1,39 @@
+# If -p, -P, -U, or -V specifies a package, set fetch variable
+# If -D specifies a package, print "${fetch}" and exit with normal status
+# If -l is present, list all packages and versions and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_downloader()
+{
+  # Verify requirements
+  if [[ ! -z "${arg_D}" ]]; then
+    if  [[ "${arg_N:-}" == "${__flag_present}" || ! -z "${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}" ]]; then
+      emergency "Please pass only one of {-D, -N, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+    fi
+  fi
+  package_name="${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}"
+  if [[ $(uname) == "Darwin" ]]; then
+    wget_or_curl=curl
+  else
+    wget_or_curl=wget
+  fi  
+  # This is a bash 3 hack standing in for a bash 4 hash (bash 3 is the lowest common
+  # denominator because, for licensing reasons, OS X only has bash 3 by default.)
+  # See http://stackoverflow.com/questions/1494178/how-to-define-hash-tables-in-bash
+  package_fetch=(
+    "strategoxt-superbundle:${wget_or_curl}"
+  )
+  for package in "${package_fetch[@]}" ; do
+     KEY="${package%%:*}"
+     VALUE="${package##*:}"
+     if [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the download mechanism:
+       fetch=${VALUE}
+       # If a printout of the download mechanism was requested, then print it and exit with normal status
+       [[ ! -z "${arg_D}" ]] && printf "%s\n" "${fetch}" && exit 0
+       break # exit the for loop
+     fi
+  done
+  if [[ -z "${fetch:-}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use -l or --list-packages to list the allowable names."
+  fi
+}
diff --git a/prerequisites/install-binary-functions/set_or_print_url.sh b/prerequisites/install-binary-functions/set_or_print_url.sh
new file mode 100644
index 0000000..9de3f0f
--- /dev/null
+++ b/prerequisites/install-binary-functions/set_or_print_url.sh
@@ -0,0 +1,52 @@
+# If -p, -D, -P, or -V specifies a package, set package_url
+# If -U specifies a package, print the package_url and exit with normal status
+# shellcheck disable=SC2154
+set_or_print_url()
+{
+  # Verify requirements
+  if [[ ! -z "${arg_U}" ]]; then
+    if [[ "${arg_N:-}" == "${__flag_present}" || ! -z "${arg_D:-${arg_p:-${arg_P:-${arg_V}}}}" ]]; then
+      emergency "Please pass only one of {-D, -N, -p, -P, -U, -V} or a longer equivalent (multiple detected)."
+    fi
+  fi
+  # Get package name from argument passed with  -p, -D, -P, -V, or -U
+  package_name="${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}"
+
+  package_url_head=(
+    "strategoxt-superbundle;https://github.com/sourceryinstitute/opencoarrays/files/212509/"
+  )
+  for package in "${package_url_head[@]}" ; do
+     KEY="${package%%;*}"
+     VALUE="${package##*;}"
+     if [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the URL head:
+       url_head="${VALUE}"
+       break
+     fi
+  done
+
+  package_url_tail=(
+    "strategoxt-superbundle;strategoxt-superbundle-${version_to_build}.tar.gz"
+  )
+  for package in "${package_url_tail[@]}" ; do
+     KEY="${package%%;*}"
+     VALUE="${package##*;}"
+     if [[ "${package_name}" == "${KEY}" ]]; then
+       # We recognize the package name so we set the URL tail:
+       url_tail="${VALUE}"
+       break
+     fi
+  done
+
+  if [[ -z "${url_head:-}" || -z "${url_tail:-}" ]]; then
+    emergency "Package ${package_name:-} not recognized.  Use -l or --list-packages to list the allowable names."
+  fi
+
+  package_url="${url_head}""${url_tail}"
+
+  # If a printout of the package URL was requested, then print it and exit with normal status
+  if [[ ! -z "${arg_U:-}" ]]; then
+    printf "%s\n" "${package_url}"
+    exit 0
+  fi
+}
diff --git a/prerequisites/install-binary.sh b/prerequisites/install-binary.sh
new file mode 100755
index 0000000..c9939fb
--- /dev/null
+++ b/prerequisites/install-binary.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+# BASH3 Boilerplate
+#
+# install-binary.sh
+#
+#  - Build OpenCoarrays prerequisite packages and their prerequisites
+#
+# Usage: LOG_LEVEL=7 B3B_USE_CASE=/opt/bash3boilerplate/src/use-case ./my-script.sh -f script_input.txt
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# The invocation of bootstrap.sh below performs the following tasks:
+# (1) Import several bash3boilerplate helper functions & default settings.
+# (2) Set several variables describing the current file and its usage page.
+# (3) Parse the usage information (default usage file name: current file's name with -usage appended).
+# (4) Parse the command line using the usage information.
+
+
+### Start of boilerplate -- do not edit this block #######################
+export OPENCOARRAYS_SRC_DIR="${OPENCOARRAYS_SRC_DIR:-${PWD}/../}"
+if [[ ! -f "${OPENCOARRAYS_SRC_DIR}/src/libcaf.h" ]]; then
+  echo "Please run this script inside the top-level OpenCoarrays source \"prerequisites\" "
+  echo "subdirectory or set OPENCOARRAYS_SRC_DIR to the OpenCoarrays source directory path."
+  exit 1
+fi
+export __usage="${OPENCOARRAYS_SRC_DIR}/prerequisites/install-binary.sh-usage"
+export B3B_USE_CASE="${B3B_USE_CASE:-${OPENCOARRAYS_SRC_DIR}/prerequisites/use-case}"
+if [[ ! -f "${B3B_USE_CASE:-}/bootstrap.sh" ]]; then
+  echo "Please set B3B_USE_CASE to the bash3boilerplate use-case directory path."
+  exit 2
+else
+    # shellcheck source=./prerequisites/use-case/bootstrap.sh
+    source "${B3B_USE_CASE}/bootstrap.sh" "$@"
+fi
+### End of boilerplate -- start user edits below #########################
+
+
+# Set up a function to call when receiving an EXIT signal to do some cleanup. Remove if
+# not needed. Other signals can be trapped too, like SIGINT and SIGTERM.
+function cleanup_before_exit () {
+  info "Cleaning up. Done"
+}
+trap cleanup_before_exit EXIT # The signal is specified here. Could be SIGINT, SIGTERM etc.
+
+### Validation (decide what's required for running your script and error out)
+#####################################################################
+
+export __flag_present=1
+# shellcheck disable=SC2154
+if [[ "${arg_l}" != "${__flag_present}" && "${arg_N}" != "${__flag_present}" &&
+      "${arg_v}" != "${__flag_present}" && "${arg_h}" != "${__flag_present}" &&
+      -z "${arg_D:-${arg_p:-${arg_P:-${arg_U:-${arg_V}}}}}" ]]; then
+  help "${__base}: Insufficient arguments. Please pass either -D, -h, -N, -l, -p, -P, -U, -v, -V, or a longer equivalent."
+fi
+
+# Suppress info and debug messages if -l, -P, -U, -V, -D, or their longer equivalent is present:
+[[ "${arg_l}" == "${__flag_present}" || ! -z "${arg_P:-${arg_U:-${arg_V:-${arg_D}}}}" ]] && suppress_info_debug_messages
+
+[ -z "${LOG_LEVEL:-}" ] && emergency "Cannot continue without LOG_LEVEL. "
+
+### Enforce mutual exclusivity of arguments that print single-line output
+[ ! -z "${arg_P:-}" ] && [ ! -z "${arg_V:-}" ] && emergency "Only specify one of -P, -U, -V, or their long-form equivalents."
+[ ! -z "${arg_P:-}" ] && [ ! -z "${arg_U:-}" ] && emergency "Only specify one of -P, -U, -V, or their long-form equivalents."
+[ ! -z "${arg_U:-}" ] && [ ! -z "${arg_V:-}" ] && emergency "Only specify one of -P, -U, -V, or their long-form equivalents."
+
+### Print bootstrapped magic variables to STDERR when LOG_LEVEL
+### is at the default value (6) or above.
+#####################################################################
+# shellcheck disable=SC2154
+{
+info "__file: ${__file}"
+info "__dir: ${__dir}"
+info "__base: ${__base}"
+info "__os: ${__os}"
+info "__usage: ${__usage}"
+info "LOG_LEVEL: ${LOG_LEVEL}"
+
+info "-d (--debug):            ${arg_d} "
+info "-D (--print-downloader): ${arg_D} "
+info "-e (--verbose):          ${arg_e} "
+info "-h (--help):             ${arg_h} "
+info "-i (--install-dir):      ${arg_i} "
+info "-I (--install-version):  ${arg_I} "
+info "-l (--list-packages):    ${arg_l} "
+info "-n (--no-color):         ${arg_n} "
+info "-p (--package):          ${arg_p}"
+info "-P (--print-path):       ${arg_P} "
+info "-U (--print-url):        ${arg_U} "
+info "-v (--version):          ${arg_v} "
+info "-V (--print-version):    ${arg_V} "
+}
+# shellcheck source=./install-binary-functions/set_or_print_default_version.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/install-binary-functions/set_or_print_default_version.sh"
+set_or_print_default_version
+export version_to_build="${arg_I:-${default_version}}"
+
+# shellcheck source=./install-binary-functions/set_or_print_downloader.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/install-binary-functions/set_or_print_downloader.sh"
+set_or_print_downloader
+
+# shellcheck source=./install-binary-functions/set_or_print_url.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/install-binary-functions/set_or_print_url.sh"
+set_or_print_url
+
+# shellcheck source=./build-functions/set_or_print_installation_path.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_or_print_installation_path.sh"
+set_or_print_installation_path
+
+# shellcheck source=./build-functions/download_if_necessary.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/download_if_necessary.sh"
+download_if_necessary
+
+# shellcheck source=./build-functions/unpack_if_necessary.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/unpack_if_necessary.sh"
+unpack_if_necessary
+
+# shellcheck source=./install-binary-functions/set_or_print_csv_binary_names.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/install-binary-functions/set_or_print_csv_binary_names.sh"
+set_or_print_csv_binary_names
+
+# shellcheck source=./build-functions/set_SUDO_if_needed_to_write_to_directory.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/build-functions/set_SUDO_if_needed_to_write_to_directory.sh"
+set_SUDO_if_needed_to_write_to_directory "${arg_i}"
+
+# shellcheck source=./install-binary-functions/move_binaries_to_install_path.sh
+source "${OPENCOARRAYS_SRC_DIR:-}/prerequisites/install-binary-functions/move_binaries_to_install_path.sh"
+move_binaries_to_install_path
diff --git a/prerequisites/install-binary.sh-usage b/prerequisites/install-binary.sh-usage
new file mode 100644
index 0000000..f45f345
--- /dev/null
+++ b/prerequisites/install-binary.sh-usage
@@ -0,0 +1,14 @@
+  -d --debug                  Enable debug mode.
+  -D --print-downloader [arg] Print download program for package specified in argument. 
+  -e --verbose                Enable verbose mode, print script as it is executed.
+  -h --help                   This page.
+  -i --install-dir [arg]      Install package in specified path. Default="/opt"
+  -I --install-version [arg]  Package version to install. (To see default, use -V or --print-version)
+  -l --list-packages          List the packages this script can install.
+  -n --no-color               Disable color output.
+  -N --print-names            Print the names of the files that install with the specified package.
+  -p --package [arg]          Package to install. 
+  -P --print-path [arg]       Print installation path for package specified in argument.
+  -U --print-url [arg]        Print URL for package specified in argument. 
+  -v --version                Print OpenCoarrays version number.
+  -V --print-version [arg]    Print installation version for package specified in argument.
diff --git a/prerequisites/install-functions/build_opencoarrays.sh b/prerequisites/install-functions/build_opencoarrays.sh
new file mode 100644
index 0000000..42dbe80
--- /dev/null
+++ b/prerequisites/install-functions/build_opencoarrays.sh
@@ -0,0 +1,26 @@
+# shellcheck disable=SC2154
+build_opencoarrays()
+{
+  print_header
+  info "Invoking find_or_install mpich"
+  find_or_install mpich
+  info "Invoking find_or_install cmake"
+  find_or_install cmake
+  build_path="${build_path}"/opencoarrays/$("${opencoarrays_src_dir}"/install.sh -V opencoarrays)
+  mkdir -p "$build_path"
+  pushd "$build_path"
+  if [[ -z "${MPICC:-}" || -z "${MPIFC:-}" || -z "${CMAKE:-}" ]]; then
+    emergency "Empty MPICC=$MPICC or MPIFC=$MPIFC or CMAKE=$CMAKE [exit 90]"
+  else
+    info "Configuring OpenCoarrays in ${PWD} with the command:"
+    info "CC=\"${MPICC}\" FC=\"${MPIFC}\" $CMAKE \"${opencoarrays_src_dir}\" -DCMAKE_INSTALL_PREFIX=\"${install_path}\""
+    CC="${MPICC}" FC="${MPIFC}" $CMAKE "${opencoarrays_src_dir}" -DCMAKE_INSTALL_PREFIX="${install_path}"
+    info "Building OpenCoarrays in ${PWD} with the command make -j${num_threads}"
+    make "-j${num_threads}"
+    if [[ ! -z ${SUDO:-} ]]; then
+      printf "\nThe chosen installation path requires sudo privileges. Please enter password if prompted.\n"
+    fi
+    info "Installing OpenCoarrays in ${install_path} with the command ${SUDO:-} make install"
+    ${SUDO:-} make install
+  fi
+}
diff --git a/prerequisites/install-functions/find_or_install.sh b/prerequisites/install-functions/find_or_install.sh
new file mode 100644
index 0000000..98aa058
--- /dev/null
+++ b/prerequisites/install-functions/find_or_install.sh
@@ -0,0 +1,644 @@
+# shellcheck disable=SC2154,SC2034
+find_or_install()
+{
+  package="$1"
+  # This is a bash 3 hack standing in for a bash 4 hash (bash 3 is the lowest common
+  # denominator because, for licensing reasons, OS X only has bash 3 by default.)
+  # See http://stackoverflow.com/questions/1494178/how-to-define-hash-tables-in-bash
+  package_executable_array=(
+    "gcc:gfortran"
+    "cmake:cmake"
+    "mpich:mpif90"
+    "flex:flex"
+    "bison:yacc"
+    "m4:m4"
+    "_unknown:0"
+  )
+  for element in "${package_executable_array[@]}" ; do
+     KEY="${element%%:*}"
+     VALUE="${element##*:}"
+     if [[ "$KEY" == "_unknown" ]]; then
+       # No recognizeable argument passed so print usage information and exit:
+       printf "%s: Package name (%s) not recognized in find_or_install function [exit 40].\n" "$this_script" "$package"
+       exit 40
+     elif [[ $package == "$KEY" ]]; then
+       executable=$VALUE
+       break
+     fi
+  done
+
+  if [[ "$package" == "$executable" ]]; then
+    printf "$this_script: Checking whether $executable is in the PATH..."
+  else
+    printf "$this_script: Checking whether $package executable $executable is in the PATH..."
+  fi
+  if type "$executable" >& /dev/null; then
+    printf "yes.\n"
+    package_in_path=true
+    package_version_in_path=$("$executable" --version|head -1)
+  else
+    printf "no.\n"
+    package_in_path=false
+  fi
+
+  package_install_path=$(./build.sh -P "$package")
+
+  printf "Checking whether $executable is in $package_install_path..."
+  if [[ -x "$package_install_path/bin/$executable" ]]; then
+    printf "yes.\n"
+    script_installed_package=true
+    stack_push script_installed "$package" "$executable"
+  else
+    script_installed_package=false
+    printf "no.\n"
+  fi
+
+  minimum_version=$(./build.sh -V "$package")
+
+  if [[ "$package" == "cmake" ]]; then
+
+    # We arrive here only by the explicit, direct call 'find_or_install cmake' inside
+    # the build_opencoarrays function. Because there is no possibility of arriving here
+    # by recursion (no packages depend on cmake except OpenCoarrays, which gets built
+    # after all dependencies have been found or installed), cmake must add itself to
+    # the dependency stack if no acceptable cmake is found.
+
+    # Every branch that discovers an acceptable pre-existing installation must set the
+    # CMAKE environment variable. Every branch must also manage the dependency stack.
+
+    if [[ "$script_installed_package" == true ]]; then
+      echo -e "$this_script: Using the $package installed by $this_script\n"
+      export CMAKE=$package_install_path/bin/$executable
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$package_in_path" == "true" ]]; then
+      echo -e "$this_script: Checking whether $package in PATH is version < $minimum_version... "
+
+      if ! ./check_version.sh "$package" "$(./build.sh -V "$package")"; then
+        printf "yes.\n"
+        # Here we place $package on the dependency stack to trigger the build of the above file:
+        stack_push dependency_pkg "$package" "none"
+        stack_push dependency_exe "$package" "none"
+        stack_push dependency_path "$(./build.sh -P cmake)" "none"
+
+      else
+        printf "no.\n"
+        echo -e "$this_script: Using the $executable found in the PATH.\n"
+        export CMAKE=$executable
+        # Prevent recursion
+        stack_push dependency_pkg "none"
+        stack_push dependency_exe "none"
+        stack_push dependency_path "none"
+      fi
+
+    else # Build package ($package has no prerequisites)
+      stack_push dependency_pkg "$package" "none"
+      stack_push dependency_exe "$package" "none"
+      stack_push dependency_path "$(./build.sh -P "$package")" "none"
+    fi
+
+  elif [[ $package == "mpich" ]]; then
+
+    # We arrive here only by the explicit, direct call 'find_or_install mpich' inside
+    # the build_opencoarrays function. Because there is no possibility of arriving here
+    # by recursion (no packages depend on mpich except OpenCoarrays, which gets built
+    # after all dependencies have been found or installed), mpich must add itself to
+    # the dependency stack if no acceptable mpich is found.
+
+    # Every branch that discovers an acceptable pre-existing installation must set the
+    # MPIFC, MPICC, and MPICXX environment variables. Every branch must also manage the
+    # dependency stack.
+   
+    # If the user specified a Fortran compiler, verify that mpif90 wraps the specified compiler
+    if [[ ! -z "${arg_M:-}" ]]; then
+
+      echo -e "$this_script: Using the $package specified by -M or --with-mpi: ${arg_M}\n"
+      export MPIFC="${arg_M}"/bin/mpif90
+      export MPICC="${arg_M}"/bin/mpicc
+      export MPICXX="${arg_M}"/bin/mpicxx
+      # Halt the recursion
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+      if [[ ! -z "${arg_f:-}" ]]; then
+        # -f or --with-fortran argument specifies a compiler, which we use if mpif90
+        # invokes the specified compiler.  Otherwise, we halt and print an error message.
+        MPIFC_wraps=$(${MPIFC} --version)
+        compiler=$(${arg_f} --version)
+        if [[ "${MPIFC_wraps}" != "${compiler}"   ]]; then
+          emergency "Specified MPI ${MPIFC_wraps} wraps a compiler other than the specified Fortran compiler ${compiler}"
+        fi
+      fi
+
+    elif [[ "$script_installed_package" == true ]]; then
+
+      echo -e "$this_script: Using the $package installed by $this_script\n"
+      export MPIFC=$package_install_path/bin/mpif90
+      export MPICC=$package_install_path/bin/mpicc
+      export MPICXX=$package_install_path/bin/mpicxx
+      # Halt the recursion
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$package_in_path" == "true" ]]; then
+
+      echo -e "$this_script: Checking whether $executable in PATH wraps gfortran... "
+      mpif90_version_header=$(mpif90 --version | head -1)
+      first_three_characters=$(echo "$mpif90_version_header" | cut -c1-3)
+      if [[ "$first_three_characters" != "GNU" ]]; then
+        printf "no.\n"
+        # Trigger 'find_or_install gcc' and subsequent build of $package
+        stack_push dependency_pkg "none" "$package" "gcc"
+        stack_push dependency_exe "none" "$executable" "gfortran"
+        stack_push dependency_path "none" "$(./build.sh -P "$package")" "$(./build.sh -P gcc)"
+      else
+        printf "yes.\n"
+
+        if [[ ! -z "${arg_f:-}" ]]; then
+          
+          info "-f (or --with-fortran) argument detected with value ${arg_f}"
+          printf "yes.\n $this_script: Using the specified $executable.\n"
+          export MPIFC=mpif90
+          export MPICC=mpicc
+          export MPICXX=mpicxx
+  
+          # Halt the recursion
+          stack_push dependency_pkg "none"
+          stack_push dependency_exe "none"
+          stack_push dependency_path "none"
+
+        else
+
+          echo -e "$this_script: Checking whether $executable in PATH wraps gfortran version `./build.sh -V gcc` or later... "
+          $executable acceptable_compiler.f90 -o acceptable_compiler
+          $executable print_true.f90 -o print_true
+          acceptable=$(./acceptable_compiler)
+          is_true=$(./print_true)
+          rm acceptable_compiler print_true
+         
+          if [[ "$acceptable" == "$is_true" ]]; then
+            printf "yes.\n $this_script: Using the $executable found in the PATH.\n"
+            export MPIFC=mpif90
+            export MPICC=mpicc
+            export MPICXX=mpicxx
+  
+            # Halt the recursion
+            stack_push dependency_pkg "none"
+            stack_push dependency_exe "none"
+            stack_push dependency_path "none"
+          else
+            printf "no\n"
+            # Trigger 'find_or_install gcc' and subsequent build of $package
+            stack_push dependency_pkg "none" "$package" "gcc"
+            stack_push dependency_exe "none" "$executable" "gfortran"
+            stack_push dependency_path "none" "$(./build.sh -P "$package")" "$(./build.sh -P gcc)"
+          fi
+        fi
+      fi
+
+    else # $package not in PATH and not yet installed by this script
+      # Trigger 'find_or_install gcc' and subsequent build of $package
+      stack_push dependency_pkg  "none" "$package" "gcc"
+      stack_push dependency_exe  "none" "$executable" "gfortran"
+      stack_push dependency_path "none" "$(./build.sh -P "$package")" "$(./build.sh -P gcc)"
+    fi
+
+  elif [[ $package == "gcc" ]]; then
+
+    # We arrive here when the 'elif [[ $package == "mpich" ]]' block pushes "gcc" onto the
+    # the dependency_pkg stack, resulting in the recursive call 'find_or_install gcc'
+
+    # Every branch that discovers an acceptable pre-existing installation must set the
+    # FC, CC, and CXX environment variables. Every branch must also manage the dependency stack.
+
+    if [[ ! -z "${arg_f:-}" ]]; then
+
+      info "-f (or --with-fortran) argument detected with value ${arg_f}"
+      [ -z "${arg_c:-}" ] && emergency "-f (--with-fortran) specifies Fortran compiler; Please also specify C/C++ compilers"
+      [ -z "${arg_C:-}" ] && emergency "-f (--with-fortran) specifies Fortran compiler; Please also specify C/C++ compilers"
+
+      export FC="${arg_f}"
+      export CC="${arg_c}"
+      export CXX="${arg_C}"
+
+      # Remove $package from the dependency stack
+      stack_pop dependency_pkg package_done
+      stack_pop dependency_exe executable_done
+      stack_pop dependency_path package_done_path
+      # Halt the recursion and signal that none of $package's prerequisites need to be built
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$script_installed_package" == true ]]; then
+      echo -e "$this_script: Using the $package executable $executable installed by $this_script\n"
+      export FC=$package_install_path/bin/gfortran
+      export CC=$package_install_path/bin/gcc
+      export CXX=$package_install_path/bin/g++
+      gfortran_lib_paths="$package_install_path/lib64/:$package_install_path/lib"
+      if [[ -z "${LD_LIBRARY_PATH:-}" ]]; then
+        echo "$this_script: export LD_LIBRARY_PATH=\"$gfortran_lib_paths\""
+                            export LD_LIBRARY_PATH="$gfortran_lib_paths"
+      else
+        echo "$this_script: export LD_LIBRARY_PATH=\"$gfortran_lib_paths:$LD_LIBRARY_PATH\""
+                            export LD_LIBRARY_PATH="$gfortran_lib_paths:$LD_LIBRARY_PATH"
+      fi
+      # Remove $package from the dependency stack
+      stack_pop dependency_pkg package_done
+      stack_pop dependency_exe executable_done
+      stack_pop dependency_path package_done_path
+      # Put $package onto the script_installed log
+      stack_push script_installed package_done
+      stack_push script_installed executable_done
+      stack_push script_installed package_done_path
+      # Halt the recursion and signal that none of $package's prerequisites need to be built
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$package_in_path" == "true" ]]; then
+      echo -e "$this_script: Checking whether $executable in PATH is version `./build.sh -V gcc` or later..."
+      $executable -o acceptable_compiler acceptable_compiler.f90
+      $executable -o print_true print_true.f90
+      is_true=$(./print_true)
+      acceptable=$(./acceptable_compiler)
+      rm acceptable_compiler print_true
+      if [[ "$acceptable" == "$is_true" ]]; then
+        printf "yes.\n"
+        echo -e "$this_script: Using the $executable found in the PATH.\n"
+        export FC=gfortran
+        export CC=gcc
+        export CXX=g++
+        # Remove $package from the dependency stack
+        stack_pop dependency_pkg package_done
+        stack_pop dependency_exe executable_done
+        stack_pop dependency_path package_done_path
+        # Put $package onto the script_installed log
+        stack_push script_installed package_done
+        stack_push script_installed executable_done
+        stack_push script_installed package_done_path
+        # Halt the recursion and signal that none of $package's prerequisites need to be built
+        stack_push dependency_pkg "none"
+        stack_push dependency_exe "none"
+        stack_push dependency_path "none"
+      else
+        printf "no.\n"
+        # Trigger 'find_or_install flex' and subsequent build of $package
+        stack_push dependency_pkg "flex"
+        stack_push dependency_exe "flex"
+        stack_push dependency_path "$(./build.sh -P flex)"
+      fi
+
+    else # $package is not in PATH and not yet installed by this script
+      # Trigger 'find_or_install flex' and subsequent build of $package
+      stack_push dependency_pkg "flex"
+      stack_push dependency_exe "flex"
+      stack_push dependency_path "$(./build.sh -P flex)"
+    fi
+
+  elif [[ $package == "flex" ]]; then
+
+    # We arrive here only if the 'elif [[ $package == "gcc" ]]' block has pushed "flex"
+    # onto the dependency_pkg stack, resulting in the recursive call 'find_or_install flex'.
+    # flex therefore does not need to add itself to the stack.
+
+    # Every branch that discovers an acceptable pre-existing installation must set the
+    # FLEX environment variable. Every branch must also manage the dependency stack.
+
+    if [[ "$script_installed_package" == true ]]; then
+      echo -e "$this_script: Using the $executable installed by $this_script\n"
+      export FLEX=$package_install_path/bin/$executable
+      # Remove flex from the dependency stack
+      stack_pop dependency_pkg package_done
+      stack_pop dependency_exe executable_done
+      stack_pop dependency_path package_done_path
+      # Put $package onto the script_installed log
+      stack_push script_installed package_done
+      stack_push script_installed executable_done
+      stack_push script_installed package_done_path
+      # Halt the recursion and signal that no prerequisites need to be built
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$package_in_path" == "true" ]]; then
+
+      echo -e "$this_script: Checking whether $package in PATH is version < $minimum_version... "
+      if ! ./check_version.sh "$package" "$(./build.sh -V "$package")"; then
+        printf "yes\n"
+
+        export FLEX="$package_install_path/bin/$executable"
+        # Trigger 'find_or_install bison' and subsequent build of $package
+        stack_push dependency_pkg "bison"
+        stack_push dependency_exe "yacc"
+        stack_push dependency_path "$(./build.sh -P bison)"
+      else
+        printf "no.\n"
+        echo -e "$this_script: Using the $executable found in the PATH.\n"
+        export FLEX=$executable
+        # Remove $package from the dependency stack
+        stack_pop dependency_pkg package_done
+        stack_pop dependency_exe executable_done
+        stack_pop dependency_path package_done_path
+        # Put $package onto the script_installed log
+        stack_push script_installed package_done
+        stack_push script_installed executable_done
+        stack_push script_installed package_done_path
+        # Halt the recursion and signal that none of $package's prerequisites need to be built
+        stack_push dependency_pkg "none"
+        stack_push dependency_exe "none"
+        stack_push dependency_path "none"
+      fi
+
+    else  # $package is not in the PATH and not yet installed by $this_script
+      # Trigger 'find_or_install bison' and subsequent build of $package
+      stack_push dependency_pkg "bison"
+      stack_push dependency_exe "yacc"
+      stack_push dependency_path "$(./build.sh -P bison)"
+    fi
+
+  elif [[ $package == "bison" ]]; then
+
+    # We arrive when the 'elif [[ $package == "flex" ]]' block pushes "bison" onto the
+    # the dependency_pkg stack, resulting in the recursive call 'find_or_install bison'
+
+    # Every branch that discovers an acceptable pre-existing installation must set the
+    # YACC environment variable. Every branch must also manage the dependency stack.
+
+    if [[ "$script_installed_package" == true ]]; then
+      echo -e "$this_script: Using the $package executable $executable installed by $this_script\n"
+      export YACC=$package_install_path/bin/yacc
+      # Remove bison from the dependency stack
+      stack_pop dependency_pkg package_done
+      stack_pop dependency_exe executable_done
+      stack_pop dependency_path package_done_path
+      # Put $package onto the script_installed log
+      stack_push script_installed package_done
+      stack_push script_installed executable_done
+      stack_push script_installed package_done_path
+      # Halt the recursion and signal that there are no prerequisites to build
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$package_in_path" == "true" ]]; then
+      echo -e "$this_script: Checking whether $package executable $executable in PATH is version < $minimum_version... "
+      if ! ./check_version.sh "$package" "$(./build.sh -V "$package")"; then
+        printf "yes.\n"
+        export YACC="$package_install_path/bin/$executable"
+        # Trigger 'find_or_install m4' and subsequent build of $package
+        stack_push dependency_pkg "m4"
+        stack_push dependency_exe "m4"
+        stack_push dependency_path "$(./build.sh -P m4)"
+      else
+        printf "no.\n"
+        echo -e "$this_script: Using the $package executable $executable found in the PATH.\n"
+        YACC=yacc
+        # Remove bison from the dependency stack
+        stack_pop dependency_pkg package_done
+        stack_pop dependency_exe executable_done
+        stack_pop dependency_path package_done_path
+        # Put $package onto the script_installed log
+        stack_push script_installed package_done
+        stack_push script_installed executable_done
+        stack_push script_installed package_done_path
+        # Halt the recursion and signal that there are no prerequisites to build
+        stack_push dependency_pkg "none"
+        stack_push dependency_exe "none"
+        stack_push dependency_path "none"
+      fi
+
+    else # $package not in PATH and not yet installed by this script
+      # Trigger 'find_or_install m4' and subsequent build of $package
+      stack_push dependency_pkg "m4"
+      stack_push dependency_exe "m4"
+      stack_push dependency_path "$(./build.sh -P m4)"
+    fi
+
+  elif [[ $package == "m4" ]]; then
+
+    # We arrive when the 'elif [[ $package == "bison" ]]' block pushes "m4" onto the
+    # the dependency_pkg stack, resulting in the recursive call 'find_or_install m4'
+
+    # Every branch that discovers an acceptable pre-existing installation must set the
+    # M4 environment variable. Every branch must also manage the dependency stack.
+
+    if [[ "$script_installed_package" == true ]]; then
+      echo -e "$this_script: Using the $package executable $executable installed by $this_script\n"
+      export M4=$package_install_path/bin/m4
+      # Remove m4 from the dependency stack
+      stack_pop dependency_pkg package_done
+      stack_pop dependency_exe executable_done
+      stack_pop dependency_path package_done_path
+      # Put $package onto the script_installed log
+      stack_push script_installed package_done
+      stack_push script_installed executable_done
+      stack_push script_installed package_done_path
+      # Halt the recursion and signal that there are no prerequisites to build
+      stack_push dependency_pkg "none"
+      stack_push dependency_exe "none"
+      stack_push dependency_path "none"
+
+    elif [[ "$package_in_path" == "true" ]]; then
+      echo -e "$this_script: Checking whether $package executable $executable in PATH is version < $minimum_version... "
+      if ! ./check_version.sh "$package" "$(./build.sh -V "$package")"; then
+        printf "yes.\n"
+        export M4="$package_install_path/bin/m4"
+        # Halt the recursion and signal that there are no prerequisites to build
+        stack_push dependency_pkg "none"
+        stack_push dependency_exe "none"
+        stack_push dependency_path "none"
+      else
+        printf "no.\n"
+        echo -e "$this_script: Using the $package executable $executable found in the PATH.\n"
+        M4=m4
+        # Remove m4 from the dependency stack
+        stack_pop dependency_pkg package_done
+        stack_pop dependency_exe executable_done
+        stack_pop dependency_path package_done_path
+        # Put $package onto the script_installed log
+        stack_push script_installed package_done
+        stack_push script_installed executable_done
+        stack_push script_installed package_done_path
+        # Halt the recursion and signal that there are no prerequisites to build
+        stack_push dependency_pkg "none"
+        stack_push dependency_exe "none"
+        stack_push dependency_path "none"
+      fi
+
+    else # $package not in PATH and not yet installed by this script
+      # Halt the recursion and signal that there are no prerequisites to build
+      export M4="$package_install_path/bin/m4"
+      stack_push dependency_pkg  "none"
+      stack_push dependency_exe  "none"
+      stack_push dependency_path "none"
+    fi
+
+  else
+    if [[ -z "${package:-}" ]]; then
+      echo -e "$this_script: empty package name passed to find_or_install function. [exit 50]\n"
+      exit 50
+    else
+      echo -e "$this_script: unknown package name ($package) passed to find_or_install function. [exit 55]\n"
+      exit 55
+    fi
+  fi
+
+  echo "$this_script: Updated dependency stack (top to bottom = left to right):"
+  stack_print dependency_pkg
+
+  stack_size dependency_pkg num_stacked
+  (( num_dependencies=num_stacked-1 )) || true
+
+  if [[ $num_dependencies -lt 0 ]]; then
+    emergency "The procedure named in the external call to find_or_install is not on the dependency stack. [exit 60]\n"
+
+  elif [[ $num_dependencies -gt 0 ]]; then
+    stack_pop  dependency_pkg  prerequisite_pkg
+    stack_pop  dependency_exe  prerequisite_exe
+    stack_pop  dependency_path prerequisite_path
+
+    if [[ $prerequisite_pkg != "none" ]]; then
+      stack_push dependency_pkg  "$prerequisite_pkg"
+      stack_push dependency_exe  "$prerequisite_exe"
+      stack_push dependency_path "$prerequisite_path"
+      echo -e "$this_script: Building $package from source requires $prerequisite_pkg.\n"
+      find_or_install "$prerequisite_pkg"
+    fi
+  fi
+
+  echo "$this_script: Remaining $package dependency stack (top to bottom = left to right):"
+  stack_print dependency_pkg
+
+  stack_pop dependency_pkg package
+  stack_pop dependency_exe executable
+  stack_pop dependency_path package_install_path
+
+  if [[ $package != "none" ]]; then
+
+    if [[ "$package" == "$executable" ]]; then
+      echo "$this_script: Ready to install $executable in $package_install_path"
+    else
+      echo "$this_script: Ready to install $package executable $executable in $package_install_path"
+    fi
+
+    echo -e "$this_script: Ok to download (if necessary), build, and install $package from source? (Y/n) "
+    read -r proceed_with_build
+
+    if [[ "$proceed_with_build" == "n" || "$proceed_with_build" == "no" ]]; then
+      printf "n\n"
+      echo -e "$this_script: OpenCoarrays installation requires $package. Aborting. [exit 70]\n"
+      exit 70
+
+    else # permission granted to build
+      printf "Y\n"
+
+      # On OS X, CMake must be built with Apple LLVM gcc, which XCode command-line tools puts in /usr/bin
+      if [[ $(uname) == "Darwin" && $package == "cmake"  ]]; then
+        if [[ -x "/usr/bin/gcc" ]]; then
+          CC=/usr/bin/gcc
+        else
+          echo -e "$this_script: OS X detected.  Please install XCode command-line tools and \n"
+          echo -e "$this_script: ensure that /usr/bin/gcc exists and is executable. Aborting. [exit 75]\n"
+          exit 75
+        fi
+      # Otherwise, if no CC has been defined yet, use the gcc in the user's PATH
+      elif [[ -z "${CC:-}" ]]; then
+        CC=gcc
+      fi
+
+      # On OS X, CMake must be built with Apple LLVM g++, which XCode command-line tools puts in /usr/bin
+      if [[ $(uname) == "Darwin" && $package == "cmake"  ]]; then
+        if [[ -x "/usr/bin/g++" ]]; then
+          CXX=/usr/bin/g++
+        else
+          echo -e "$this_script: OS X detected.  Please install XCode command-line tools \n"
+          echo -e "$this_script: and ensure that /usr/bin/g++ exists and is executable. Aborting. [exit 76]\n"
+          exit 76
+        fi
+      # Otherwise, if no CXX has been defined yet, use the g++ in the user's PATH
+      elif [[ -z "${CXX:-}" ]]; then
+        CXX=g++
+      fi
+
+      # If no FC has been defined yet, use the gfortran in the user's PATH
+      if [[ -z "${FC:-}" ]]; then
+        FC=gfortran
+      fi
+
+
+      # Strip trailing package name and version number, if present, from installation path
+      default_package_version=$(./build.sh -V ${package})
+      package_install_prefix="${package_install_path%${package}/${arg_I:-${default_package_version}}*}" 
+
+      echo -e "$this_script: Downloading, building, and installing $package \n"
+      echo "$this_script: Build command: FC=$FC CC=$CC CXX=$CXX ./build.sh -p $package -i $package_install_prefix -j $num_threads"
+      FC="$FC" CC="$CC" CXX="$CXX" ./build.sh -p "$package" -i "$package_install_prefix" -j "$num_threads"
+
+      if [[ -x "$package_install_path/bin/$executable" ]]; then
+        echo -e "$this_script: Installation successful.\n"
+        if [[ "$package" == "$executable" ]]; then
+          echo -e "$this_script: $executable is in $package_install_path/bin \n"
+        else
+          echo -e "$this_script: $package executable $executable is in $package_install_path/bin \n"
+        fi
+       # TODO Merge all applicable branches under one 'if [[ $package == $executable ]]; then'
+        if [[ $package == "cmake" ]]; then
+          echo "$this_script: export CMAKE=$package_install_path/bin/$executable"
+                              export CMAKE="$package_install_path/bin/$executable"
+        elif [[ $package == "bison" ]]; then
+          echo "$this_script: export YACC=$package_install_path/bin/$executable"
+                              export YACC="$package_install_path/bin/$executable"
+        elif [[ $package == "flex" ]]; then
+          echo "$this_script: export FLEX=$package_install_path/bin/$executable"
+                              export FLEX="$package_install_path/bin/$executable"
+        elif [[ $package == "m4" ]]; then
+          echo "$this_script: export M4=$package_install_path/bin/$executable"
+                              export M4="$package_install_path/bin/$executable"
+        elif [[ $package == "gcc" ]]; then
+          echo "$this_script: export FC=$package_install_path/bin/gfortran"
+                              export FC="$package_install_path/bin/gfortran"
+          echo "$this_script: export CC=$package_install_path/bin/gcc"
+                              export CC="$package_install_path/bin/gcc"
+          echo "$this_script: export CXX=$package_install_path/bin/g++"
+                              export CXX="$package_install_path/bin/g++"
+          gfortran_lib_paths="$package_install_path/lib64/:$package_install_path/lib"
+          if [[ -z "${LD_LIBRARY_PATH:-}" ]]; then
+            export LD_LIBRARY_PATH="$gfortran_lib_paths"
+          else
+            export LD_LIBRARY_PATH="$gfortran_lib_paths:$LD_LIBRARY_PATH"
+          fi
+        elif [[ $package == "mpich" ]]; then
+          echo "$this_script: export MPIFC=$package_install_path/bin/mpif90"
+                              export MPIFC="$package_install_path/bin/mpif90"
+          echo "$this_script: export MPICC= $package_install_path/bin/mpicc"
+                              export MPICC="$package_install_path/bin/mpicc"
+          echo "$this_script: export MPICXX=$package_install_path/bin/mpicxx"
+                              export MPICXX="$package_install_path/bin/mpicxx"
+        else
+          echo -e "$this_script: WARNING: $package executable $executable installed correctly but the \n"
+          echo -e "$this_script:          corresponding environment variable(s) have not been set. This \n"
+          echo -e "$this_script:          could prevent a complete build of OpenCoarrays. Please report this\n"
+          echo -e "$this_script:          issue at https://github.com/sourceryinstitute/opencoarrays/issues\n"
+        fi
+        if [[ -z "${PATH:-}" ]]; then
+          export PATH="$package_install_path/bin"
+        else
+          export PATH="$package_install_path/bin:$PATH"
+        fi
+      else
+        echo -e "$this_script: Installation unsuccessful. "
+        echo -e "$executable is not in the following expected path or the user lacks executable permission for it:\n"
+        echo -e "$package_install_path/bin \n"
+        printf "Aborting. [exit 80]"
+        exit 80
+      fi # End 'if [[ -x "$package_install_path/bin/$executable" ]]'
+
+    fi # End 'if [[ "$proceed_with_build" == "y" ]]; then'
+
+  fi # End 'if [[ "$package" != "none" ]]; then'
+}
diff --git a/prerequisites/install-functions/print_header.sh b/prerequisites/install-functions/print_header.sh
new file mode 100644
index 0000000..38a1342
--- /dev/null
+++ b/prerequisites/install-functions/print_header.sh
@@ -0,0 +1,34 @@
+# shellcheck disable=SC2154
+print_header()
+{
+  clear
+  echo ""
+  echo "*** By default, building OpenCoarrays requires CMake 3.4.0 or later,      ***"
+  echo "*** MPICH 3.1.4, and GCC Fortran (gfortran) 6.1.0 or later.  To see       ***"
+  echo "*** options for forcing the use of older or alternative packages, execute ***"
+  echo "*** this script with the -h flag.  This script will recursively traverse  ***"
+  echo "*** the following dependency tree, asking permission to download, build,  ***"
+  echo "*** and install any packages that are required for building another       ***"
+  echo "*** package and are neither in your PATH nor in                           ***"
+  echo "*** opencoarrays/prerequisites/installations:                             ***"
+  echo ""
+  # Move to a directory tree whose structure mirrors the dependency tree
+  pushd "$opencoarrays_src_dir/doc/dependency_tree/" > /dev/null
+  if type tree &> /dev/null; then
+    # dynamically compute and print the tree, suppressing the final line
+    tree opencoarrays  | sed '$d'
+  else
+    # print the most recently saved output of the above 'tree' command
+    sed '$d' < opencoarrays-tree.txt
+  fi
+  popd > /dev/null
+  echo ""
+  printf "${arg_p} will be installed in ${install_path}\n"
+  echo ""
+  printf "Ready to rock and roll? (Y/n)"
+  read -r install_now
+  echo -e " $install_now\n"
+  if [[ "$install_now" == "n" || "$install_now" == "no" ]]; then
+    emergency "$this_script: Aborting. [exit 85]\n"
+  fi
+}
diff --git a/prerequisites/install-functions/report_results.sh b/prerequisites/install-functions/report_results.sh
new file mode 100644
index 0000000..4f6ed77
--- /dev/null
+++ b/prerequisites/install-functions/report_results.sh
@@ -0,0 +1,139 @@
+# shellcheck disable=SC2154,SC2129
+report_results()
+{
+  # Report installation success or failure:
+  if [[ -x "$install_path/bin/caf" && -x "$install_path/bin/cafrun" ]]; then
+
+    # Installation succeeded
+    echo "$this_script: Done."
+    echo ""
+    echo "*** The OpenCoarrays compiler wrapper (caf) and program  ***"
+    echo "*** launcher (cafrun) are in the following directory:    ***"
+    echo ""
+    echo "$install_path/bin."
+    echo ""
+    if [[ -f setup.sh ]]; then
+      ${SUDO:-} rm setup.sh
+    fi
+    if [[ -f setup.csh ]]; then
+      ${SUDO:-} rm setup.csh
+    fi
+    # Prepend the OpenCoarrays license to the setup.sh script:
+    while IFS='' read -r line || [[ -n "$line" ]]; do
+        echo "# $line" >> setup.sh
+    done < "${opencoarrays_src_dir}/LICENSE"
+    while IFS='' read -r line || [[ -n "$line" ]]; do
+        echo "# $line" >> setup.csh
+    done < "${opencoarrays_src_dir}/LICENSE"
+    echo "#                                                                      " | tee -a setup.csh setup.sh
+    echo "# Execute this script via the following command:                       " | tee -a setup.csh setup.sh
+    echo "# source $install_path/setup.sh                                        " | tee -a setup.csh setup.sh
+    echo "                                                                       " | tee -a setup.csh setup.sh
+    gcc_install_path=$("${build_script}" -P gcc)
+    if [[ -x "$gcc_install_path/bin/gfortran" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$gcc_install_path/bin\"                              " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$gcc_install_path/bin:\$PATH\"                       " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set path = (\"$gcc_install_path\"/bin "\$path")                      " >> setup.csh
+    fi
+    if [[ -d "$gcc_install_path/lib" || -d "$gcc_install_path/lib64" ]]; then
+      gfortran_lib_paths="$gcc_install_path/lib64/:$gcc_install_path/lib"
+      echo "if [[ -z \"\$LD_LIBRARY_PATH\" ]]; then                              " >> setup.sh
+      echo "  export LD_LIBRARY_PATH=\"$gfortran_lib_paths\"                     " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export LD_LIBRARY_PATH=\"$gfortran_lib_paths:\$LD_LIBRARY_PATH\"   " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set LD_LIBRARY_PATH = (\"$gfortran_lib_paths\"/bin "\$LD_LIBRARY_PATH")                      " >> setup.csh
+    fi
+    echo "                                                                       " >> setup.sh
+    mpich_install_path=$("${build_script}" -P mpich)
+    if [[ -x "$mpich_install_path/bin/mpif90" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$mpich_install_path/bin\"                            " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$mpich_install_path/bin\":\$PATH                     " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set path = (\"$mpich_install_path\"/bin "\$path")                      " >> setup.csh
+    fi
+    cmake_install_path=$("${build_script}" -P cmake)
+    if [[ -x "$cmake_install_path/bin/cmake" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$cmake_install_path/bin\"                            " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$cmake_install_path/bin\":\$PATH                     " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set path = (\"$cmake_install_path\"/bin "\$path")                      " >> setup.csh
+    fi
+    flex_install_path=$("${build_script}" -P flex)
+    if [[ -x "$flex_install_path/bin/flex" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$flex_install_path/bin\"                             " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$flex_install_path/bin\":\$PATH                      " >> setup.sh
+      echo "set path = (\"$flex_install_path\"/bin "\$path")                      " >> setup.csh
+      echo "fi                                                                   " >> setup.sh
+    fi
+    bison_install_path=$("${build_script}" -P bison)
+    if [[ -x "$bison_install_path/bin/yacc" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$bison_install_path/bin\"                            " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$bison_install_path/bin\":\$PATH                     " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set path = (\"$bison_install_path\"/bin "\$path")                      " >> setup.csh
+    fi
+    m4_install_path=$("${build_script}" -P m4)
+    if [[ -x "$m4_install_path/bin/m4" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$m4_install_path/bin\"                               " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$m4_install_path/bin\":\$PATH                        " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set path = (\"$m4_install_path\"/bin "\$path")                      " >> setup.csh
+    fi
+    opencoarrays_install_path="${install_path}"
+    if [[ -x "$opencoarrays_install_path/bin/caf" ]]; then
+      echo "if [[ -z \"\$PATH\" ]]; then                                         " >> setup.sh
+      echo "  export PATH=\"$opencoarrays_install_path/bin\"                     " >> setup.sh
+      echo "else                                                                 " >> setup.sh
+      echo "  export PATH=\"$opencoarrays_install_path/bin\":\$PATH              " >> setup.sh
+      echo "fi                                                                   " >> setup.sh
+      echo "set path = (\"$opencoarrays_install_path\"/bin "\$path")                      " >> setup.csh
+    fi
+    if ${SUDO:-} mv setup.sh "$opencoarrays_install_path"; then
+       setup_sh_location=$opencoarrays_install_path
+    else
+       setup_sh_location=${PWD}
+    fi
+    if ${SUDO:-} mv setup.csh "$opencoarrays_install_path"; then
+       setup_csh_location=$opencoarrays_install_path
+    else
+       setup_csh_location=${PWD}
+    fi
+    echo "*** To set up your environment for using caf and cafrun, please   ***"
+    echo "*** source the installed setup.sh file in a bash shell  setup.csh ***"
+    echo "*** if you use a C-shell as follows (or add one of the following  ***"
+    echo "*** statements to your login file:                                 ***"
+    echo ""
+    echo " source $setup_sh_location/setup.sh"
+    echo " source $setup_csh_location/setup.csh"
+    echo ""
+    echo "*** Installation complete.                                                   ***"
+
+  else # Installation failed
+
+    echo "Something went wrong. Either the user lacks executable permissions for the"
+    echo "OpenCoarrays compiler wrapper (caf), program launcher (cafrun), or prerequisite"
+    echo "package installer (build), or these programs are not in the following, expected"
+    echo "location:"
+    echo "$install_path/bin."
+    echo "Please review the following file for more information:"
+    echo "$install_path/$installation_record"
+    echo "and submit an bug report at https://github.com/sourceryinstitute/opencoarrays/issues"
+    echo "[exit 100]"
+    exit 100
+
+  fi # Ending check for caf, cafrun, build not in expected path
+}
diff --git a/prerequisites/install-ofp.sh b/prerequisites/install-ofp.sh
new file mode 100755
index 0000000..7fde8e5
--- /dev/null
+++ b/prerequisites/install-ofp.sh
@@ -0,0 +1,179 @@
+#!/usr/bin/env bash
+# BASH3 Boilerplate
+#
+# install-ofp.sh
+#
+#  - Build the Open Fortran Parser
+#
+# Usage: ./install-ofp.sh -i /opt
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# The invocation of bootstrap.sh below performs the following tasks:
+# (1) Import several bash3boilerplate helper functions & default settings.
+# (2) Set several variables describing the current file and its usage page.
+# (3) Parse the usage information (default usage file name: current file's name with -usage appended).
+# (4) Parse the command line using the usage information.
+
+export OPENCOARRAYS_SRC_DIR="${OPENCOARRAYS_SRC_DIR:-${PWD}/..}"
+if [[ ! -f "${OPENCOARRAYS_SRC_DIR}/src/libcaf.h" ]]; then
+  echo "Please run this script inside the OpenCoarrays source \"prerequisites\" subdirectory"
+  echo "or set OPENCOARRAYS_SRC_DIR to the top-level OpenCoarrays source directory path."
+  exit 1
+fi
+export __usage="${OPENCOARRAYS_SRC_DIR}/prerequisites/install-ofp.sh-usage"
+export B3B_USE_CASE="${B3B_USE_CASE:-${OPENCOARRAYS_SRC_DIR}/prerequisites/use-case}"
+if [[ ! -f "${B3B_USE_CASE:-}/bootstrap.sh" ]]; then
+  echo "Please set B3B_USE_CASE to the bash3boilerplate use-case directory path."
+  exit 2
+fi
+# shellcheck source=./use-case/bootstrap.sh
+source "${B3B_USE_CASE}/bootstrap.sh" "$@"
+
+# Set up a function to call when receiving an EXIT signal to do some cleanup. Remove if
+# not needed. Other signals can be trapped too, like SIGINT and SIGTERM.
+function cleanup_before_exit () {
+  info "Cleaning up. Done"
+}
+trap cleanup_before_exit EXIT # The signal is specified here. Could be SIGINT, SIGTERM etc.
+
+export __flag_present=1
+
+# Verify requirements
+
+[ -z "${LOG_LEVEL:-}" ] && emergency "Cannot continue without LOG_LEVEL. "
+
+# shellcheck disable=SC2154
+if [[ "${__os}" != "OSX" ]]; then
+   info      "${__base} currently installs binaries that work only on OS X"
+   emergency "To request other platforms, please submit an issue at http://github.com/sourceryinstitute/opencoarrays/issues"
+fi
+
+if [[ $(uname) == "Darwin"  ]]; then
+  default_ofp_downloader=curl
+  args="-LO"
+else
+  default_ofp_downloader=wget
+  args="--no-check-certificate"
+fi
+
+# If -D is passed, print the download programs used for OFP and its prerequisites.
+# Then exit with normal status.
+# shellcheck  disable=SC2154
+if [[ "${arg_D}" == "${__flag_present}" ]]; then
+  echo "strategoxt-superbundle downloader: $("${OPENCOARRAYS_SRC_DIR}/prerequisites/install-binary.sh" -D strategoxt-superbundle)"
+  echo "ofp-sdf default downloader: ${default_ofp_downloader}"
+  exit 0
+fi
+
+# If -P is passed, print the default installation paths for OFP and its prerequisites.
+# Then exit with normal status.
+install_path="${arg_i}"
+strategoxt_superbundle_install_path=`${OPENCOARRAYS_SRC_DIR}/prerequisites/install-binary.sh -P strategoxt-superbundle`
+# shellcheck disable=SC2154
+if [[ "${arg_P}" == "${__flag_present}" ]]; then
+  echo "strategoxt-superbundle default installation path: ${strategoxt_superbundle_install_path}"
+  echo "ofp default installation path: ${install_path}"
+  exit 0
+fi
+
+# If -V is passed, print the default versions of OFP and its prerequisites.
+# Then exit with normal status.
+default_ofp_version=sdf
+# shellcheck disable=SC2154
+if [[ "${arg_V}" == "${__flag_present}" ]]; then
+  echo "strategoxt-superbundle default version: $("${OPENCOARRAYS_SRC_DIR}/prerequisites/install-binary.sh" -V strategoxt-superbundle)"
+  echo "ofp default version: ${default_ofp_version}"
+  exit 0
+fi
+
+# If -U is passed, print the URLs for OFP and its prerequisites.
+# Then exit with normal status.
+ofp_url_head="https://github.com/sourceryinstitute/opencoarrays/files/213108/"
+ofp_url_tail="ofp-sdf.tar.gz"
+# shellcheck disable=SC2154
+if [[ "${arg_U}" == "${__flag_present}" ]]; then
+  echo "strategoxt-superbundle URL: $("${OPENCOARRAYS_SRC_DIR}/prerequisites/install-binary.sh" -U strategoxt-superbundle)"
+  echo "ofp URL: ${ofp_url_head}${ofp_url_tail}"
+  exit 0
+fi
+
+### Print bootstrapped magic variables to STDERR when LOG_LEVEL
+### is at the default value (6) or above.
+#####################################################################
+# shellcheck disable=SC2154
+{
+info "__file: ${__file}"
+info "__dir: ${__dir}"
+info "__base: ${__base}"
+info "__os: ${__os}"
+info "__usage: ${__usage}"
+info "LOG_LEVEL: ${LOG_LEVEL}"
+
+info "-d (--debug):            ${arg_d}"
+info "-D (--print-downloader): ${arg_D}"
+info "-e (--verbose):          ${arg_e}"
+info "-h (--help):             ${arg_h}"
+info "-i (--install-dir):      ${arg_i}"
+info "-I (--install-version):  ${arg_i}"
+info "-j (--num-threads):      ${arg_j}"
+info "-n (--no-color):         ${arg_n}"
+info "-P (--print-path):       ${arg_P}"
+info "-U (--print-url):        ${arg_U}"
+info "-V (--print-version):    ${arg_V}"
+}
+# Set OFP installation path to the value of the -i argument if present.
+# Otherwise, install OFP in the OpenCoarrays prerequisites/installations directory.
+opencoarrays_prerequisites_dir="${OPENCOARRAYS_SRC_DIR}"/prerequisites/
+if [[ "${arg_i}" == "${__flag_present}" ]]; then
+  install_path="${arg_i}"
+else
+  install_path="${opencoarrays_prerequisites_dir}"/installations
+fi
+
+ofp_prereqs_install_dir="/opt"
+# Change present working directory to installation directory
+if [[ ! -d "${install_path}" ]]; then
+  # shellcheck source=./build-functions/set_SUDO_if_needed_to_write_to_directory.sh
+  source "${opencoarrays_prerequisites_dir}/build-functions/set_SUDO_if_needed_to_write_to_directory.sh"
+  set_SUDO_if_needed_to_write_to_directory "${install_path}"
+  ${SUDO:-} mkdir -p "${install_path}"
+fi
+
+# Install OFP prerequisites to /opt (currently the only option)
+"${opencoarrays_prerequisites_dir}"/install-binary.sh -p strategoxt-superbundle -i "${strategoxt_superbundle_install_path}"
+
+# Downlaod OFP
+pushd "${install_path}"
+info "OFP Download command: ${default_ofp_downloader} ${args:-} \"${ofp_url_head}${ofp_url_tail}\""
+${default_ofp_downloader} ${args:-} "${ofp_url_head}${ofp_url_tail}" 
+
+
+# Uncompress OFP
+tar xf ofp-sdf.tar.gz
+# Return to the original working directory
+popd
+
+export SDF2_PATH="${ofp_prereqs_install_dir}"/sdf2-bundle/v2.4/bin
+export ST_PATH="${ofp_prereqs_install_dir}"/strategoxt/v0.17/bin
+export DYLD_LIBRARY_PATH="${ofp_prereqs_install_dir}"/strategoxt/v0.17/lib:/opt/aterm/v2.5/lib
+
+export OFP_HOME="${install_path}"/ofp-sdf
+# shellcheck source=./install-binary-functions/build_parse_table.sh
+source "${opencoarrays_prerequisites_dir}"/install-binary-functions/build_parse_table.sh
+build_parse_table
diff --git a/prerequisites/install-ofp.sh-usage b/prerequisites/install-ofp.sh-usage
new file mode 100644
index 0000000..6d61049
--- /dev/null
+++ b/prerequisites/install-ofp.sh-usage
@@ -0,0 +1,11 @@
+  -d --debug                 Enable debug mode.
+  -D --print-downloader      Print default download program.
+  -e --verbose               Enable verbose mode, print script as it is executed.
+  -h --help                  This page.
+  -i --install-dir [arg]     Install OFP in specified path. Default="${OPENCOARRAYS_SRC_DIR}/prerequisites/installations/"
+  -I --install-version [arg] OFP version to install. (To see default, use -V or --print-version)
+  -j --num-threads [arg]     Number of threads to use when invoking make. Default="1"
+  -n --no-color              Disable color output.
+  -P --print-path            Print OFP installation path.
+  -U --print-url             Print OFP URL.
+  -V --print-version         Print OFP version.
diff --git a/prerequisites/print_true.f90 b/prerequisites/print_true.f90
new file mode 100644
index 0000000..fdebe33
--- /dev/null
+++ b/prerequisites/print_true.f90
@@ -0,0 +1,2 @@
+print *,.true.
+end
diff --git a/prerequisites/stack.sh b/prerequisites/stack.sh
new file mode 100644
index 0000000..5f73525
--- /dev/null
+++ b/prerequisites/stack.sh
@@ -0,0 +1,254 @@
+# A stack, using bash arrays.
+# ---------------------------------------------------------------------------
+# This software is released under a BSD license, adapted from
+# <http://opensource.org/licenses/bsd-license.php>
+#
+# Copyright © 1989-2012 Brian M. Clapper.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+#
+# * Neither the name "clapper.org" nor the names of its contributors may be
+#  used to endorse or promote products derived from this software without
+#  specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+# Source: http://brizzled.clapper.org/blog/2011/10/28/a-bash-stack/
+
+# Destroy a stack
+#
+# Usage: stack_destroy name
+function stack_destroy
+{
+    : "${1?'Missing stack name'}"
+    eval "unset _stack_$1 _stack_$1_i"
+    return 0
+}
+
+# Push one or more items onto a stack.
+#
+# Usage: stack_push stack item ...
+function stack_push
+{
+    : "${1?'Missing stack name'}"
+    : "${2?'Missing item(s) to push'}"
+
+    if no_such_stack "$1"
+    then
+        echo "No such stack -- $1" >&2
+        return 1
+    fi
+
+    stack=$1
+    shift 1
+
+    while (( $# > 0 ))
+    do
+        eval '_i=$'"_stack_${stack}_i"
+        eval "_stack_${stack}[$_i]='$1'"
+        eval "let _stack_${stack}_i+=1"
+        shift 1
+    done
+
+    unset _i
+    return 0
+}
+
+
+# Get the size of a stack
+#
+# Usage: stack_size name var
+#
+# Example:
+#    stack_size mystack n
+#    echo "Size is $n"
+function stack_size
+{
+    : "${1?'Missing stack name'}"
+    : "${2?'Missing name of variable for stack size result'}"
+    if no_such_stack "$1"
+    then
+        echo "No such stack -- $1" >&2
+        return 1
+    fi
+    eval "$2"='$'"{#_stack_$1[*]}"
+}
+
+function no_such_stack
+{
+    : "${1?'Missing stack name'}"
+    stack_exists "$1"
+    ret=$?
+    declare -i x
+    let x="1-$ret"
+    return $x
+}
+
+#### Functions modified by Damian Rouson
+
+# These functions were modified to work with the shell settings in bash3boilerplate
+# (https://github.com/zbeekman/bash3boilerplate), primarily the "set -o unset" and
+# "set -o pipefail" settings.  The required modifications are described below.
+#
+
+# Pop the top element from the stack.
+#
+# Usage: stack_pop name var
+#
+# Example:
+#    stack_pop mystack top
+#    echo "Got $top"
+#
+# Modification for use with bash3boilerplate:
+#    replaced "let _i-=1" with  "(( _i-=1 )) || true"
+
+function stack_pop
+{
+    : "${1?'Missing stack name'}"
+    : "${2?'Missing name of variable for popped result'}"
+
+    eval 'let _i=$'"_stack_$1_i"
+
+    if no_such_stack "$1"
+    then
+        echo "No such stack -- $1" >&2
+        return 1
+    fi
+
+    if [[ "$_i" -eq 0 ]]
+    then
+        echo "Empty stack -- $1" >&2
+        return 1
+    fi
+
+   (( _i-=1 )) || true
+    eval "$2"='$'"{_stack_$1[$_i]}"
+    eval "unset _stack_$1[$_i]"
+    eval "_stack_$1_i=$_i"
+    unset _i
+    return 0
+}
+
+# Print a stack to stdout.
+#
+# Usage: stack_print name
+#
+# Modification for use with bash3boilerplate:
+#    replaced "let _i=${_i}-1" with "(( _i=${_i}-1 )) || true" to support execution with "set -o nounset"
+
+function stack_print
+{
+    : "${1?'Missing stack name'}"
+
+    if no_such_stack "$1"
+    then
+        echo "No such stack -- $1" >&2
+        return 1
+    fi
+
+    tmp=""
+    eval 'let _i=$'"_stack_$1_i"
+
+    while (( _i > 0 ))
+    do
+       (( _i = _i - 1 )) || true
+        eval 'e=$'"{_stack_$1[$_i]}"
+	# shellcheck  disable=SC2154
+        tmp="$tmp $e"
+    done
+    # shellcheck  disable=SC2086
+    echo "(" $tmp ")"
+}
+
+# Create a new stack.
+#
+# Usage: stack_new name
+#
+# Example: stack_new x
+#
+# Modification for use with bash3boilerplate:
+#    added "|| true" to allow execution with "set -o nounset" on OS X
+
+function stack_new
+{
+    : "${1?'Missing stack name'}"
+    if stack_exists "$1"
+    then
+        echo "Stack already exists -- $1" >&2
+        return 1
+    fi
+
+    if [[ $(uname) == "Darwin" ]]; then
+      eval "declare -ag _stack_$1" >& /dev/null || true
+      eval "declare -ig _stack_$1_i" >& /dev/null || true
+    else
+      eval "declare -ag _stack_$1" >& /dev/null
+      eval "declare -ig _stack_$1_i" >& /dev/null
+    fi
+
+    variableName="_stack_$1_i"
+    variableVal="0"
+    eval "${variableName}"="$(echo -ne \""${variableVal}"\")"
+
+    return 0
+}
+
+# Verify stack existence.
+#
+# Usage: stack_exists name
+#
+# Example: stack_new x
+#
+# Modification for use with bash3boilerplate:
+#    added curly braces in eval statement to allow execution with "set -o nounset"
+
+function stack_exists
+{
+    : "${1?'Missing stack name'}"
+
+    eval '_i=$'"{_stack_$1_i:-}"
+    if [[ -z "${_i:-}" ]]
+    then
+        return 1
+    else
+        return 0
+    fi
+}
+
+#### Functions added by Damian Rouson
+
+# Get the top element from the stack and return the stack
+# to its state before invocation of the function.
+#
+# Usage: stack_peek name var
+#
+# Example:
+#    stack_peek mystack top
+#    echo "Got $top"
+function stack_peek
+{
+  stack_pop "$1" "$2"
+  eval argument_name="\$$2"
+  # shellcheck disable=SC2154
+  stack_push "$1" "$argument_name"
+}
diff --git a/prerequisites/use-case/bootstrap.sh b/prerequisites/use-case/bootstrap.sh
new file mode 100755
index 0000000..1b7fd85
--- /dev/null
+++ b/prerequisites/use-case/bootstrap.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# BASH3 Boilerplate
+#
+#  bootstrap.sh
+#
+#  - Exports bash3boilerplate features and variables to the invoking script
+#  - Invokes functions containing commands extracted from the bash3boilerplate
+#    main.sh as part of a refactoring to facilitate wholesale reuse of main.sh's
+#    contents of without modification.
+#
+# Usage (as invoked in my-script.sh):
+#
+#   source bootstrap.sh "${@}"
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.1.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# shellcheck source=./set_environment_and_color.sh
+source "${B3B_USE_CASE}"/set_environment_and_color.sh         # turn on errexit, nounset, pipefail, default log level
+# shellcheck source=./set_magic_variables.sh
+source "${B3B_USE_CASE}"/set_magic_variables.sh "$(caller 0)" # set __dir, __file, __filename, __base, __os
+# shellcheck source=./define_functions.sh
+source "${B3B_USE_CASE}"/define_functions.sh                  # help/usage function and debug/info output functions
+# shellcheck source=./parse_command_line.sh
+source "${B3B_USE_CASE}"/parse_command_line.sh "${@:-}"       # parse the command line
+# shellcheck source=./set_common_switches.sh
+source "${B3B_USE_CASE}"/set_common_switches.sh               # provide defaults for -h, -V, and -d
diff --git a/prerequisites/use-case/define_functions.sh b/prerequisites/use-case/define_functions.sh
new file mode 100644
index 0000000..48d94bd
--- /dev/null
+++ b/prerequisites/use-case/define_functions.sh
@@ -0,0 +1,105 @@
+# BASH3 Boilerplate
+#
+#  define_functions.sh
+#
+#  - Defines helper functions containing commands extracted from the
+#    bash3boilerplate main.sh as part of a refactoring to facilitate
+#    wholesale reuse of main.sh's contents of without modification.
+#
+# Usage (as invoked in bootstrap.sh):
+#
+#   source define_functions.sh
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+### Functions
+#####################################################################
+
+# shellcheck disable=SC2034
+function _fmt ()      {
+  local color_debug="\x1b[35m"
+  local color_info="\x1b[32m"
+  local color_notice="\x1b[34m"
+  local color_warning="\x1b[33m"
+  local color_error="\x1b[31m"
+  local color_critical="\x1b[1;31m"
+  local color_alert="\x1b[1;33;41m"
+  local color_emergency="\x1b[1;4;5;33;41m"
+  local colorvar=color_$1
+
+  local color="${!colorvar:-$color_error}"
+  local color_reset="\x1b[0m"
+  if [ "${NO_COLOR}" = "true" ] || [[ "${TERM:-}" != "xterm"* ]] || [ -t 1 ]; then
+    # Don't use colors on pipes or non-recognized terminals
+    color=""; color_reset=""
+  fi
+  echo -e "$(date -u +"%Y-%m-%d %H:%M:%S UTC") ${color}$(printf "[%9s]" "${1}")${color_reset}";
+}
+
+# The block of single-line functions below all print to STDERR,
+# leaving STDOUT for piping machine readable information to other
+# software. Above each such function is a commented demonstration
+# of its usage.  Execution continues after an invocation of each
+# function, except the "emergency" function, which causes
+# termination with a non-zero exit status.
+
+# shellcheck disable=SC2015
+{
+# emergency "A \"panic\" condition usually affecting multiple apps/servers/sites. At this level it would usually notify all tech staff on call."
+function emergency () {                             echo "$(_fmt emergency) ${*}" 1>&2 || true; exit 1; }
+# alert "Should be corrected immediately, therefore notify staff who can fix the problem. An example would be the loss of a primary ISP connection."
+function alert ()     { [ "${LOG_LEVEL}" -ge 1 ] && echo "$(_fmt alert) ${*}" 1>&2 || true; }
+# critical "Should be corrected immediately, but indicates failure in a primary system, an example is a loss of a backup ISP connection."
+function critical ()  { [ "${LOG_LEVEL}" -ge 2 ] && echo "$(_fmt critical) ${*}" 1>&2 || true; }
+# error "Non-urgent failures, these should be relayed to developers or admins; each item must be resolved within a given time."
+function error ()     { [ "${LOG_LEVEL}" -ge 3 ] && echo "$(_fmt error) ${*}" 1>&2 || true; }
+# warning "Warning messages, not an error, but indication that an error will occur if action is not taken, e.g. file system 85% full - each item must be resolved within a given time. This is a debug message"
+function warning ()   { [ "${LOG_LEVEL}" -ge 4 ] && echo "$(_fmt warning) ${*}" 1>&2 || true; }
+# notice "Events that are unusual but not error conditions - might be summarized in an email to developers or admins to spot potential problems - no immediate action required."
+function notice ()    { [ "${LOG_LEVEL}" -ge 5 ] && echo "$(_fmt notice) ${*}" 1>&2 || true; }
+# info "Normal operational messages - may be harvested for reporting, measuring throughput, etc. - no action required."
+function info ()      { [ "${LOG_LEVEL}" -ge 6 ] && echo "$(_fmt info) ${*}" 1>&2 || true; }
+# debug "Info useful to developers for debugging the application, not useful during operations."
+function debug ()     { [ "${LOG_LEVEL}" -ge 7 ] && echo "$(_fmt debug) ${*}" 1>&2 || true; }
+}
+function suppress_debug_messages() { export LOG_LEVEL=6; }
+function suppress_info_debug_messages () { export LOG_LEVEL=5; }
+function suppress_notice_info_debug_messages () { export LOG_LEVEL=4; }
+
+function help () {
+  echo "" 1>&2
+  echo " ${*}" 1>&2
+  echo "" 1>&2
+  # shellcheck disable=SC2154
+  cat "${__usage}" 1>&2
+  echo "" 1>&2
+  exit 1
+}
+export -f help
+export -f emergency
+export -f alert
+export -f critical
+export -f error
+export -f warning
+export -f notice
+export -f info
+export -f debug
+export suppress_debug_messages
+export suppress_info_debug_messages
+export suppress_notice_info_debug_messages
diff --git a/prerequisites/use-case/parse_command_line.sh b/prerequisites/use-case/parse_command_line.sh
new file mode 100644
index 0000000..e3e6027
--- /dev/null
+++ b/prerequisites/use-case/parse_command_line.sh
@@ -0,0 +1,127 @@
+# BASH3 Boilerplate
+#
+#  parse_command_line.sh
+#
+#  - Uses usage information defined in "${__usage}" to parse the command line.
+#  - Defines a function containing commands extracted from the bash3boilerplate
+#    main.sh as part of a refactoring to facilitate wholesale reuse of main.sh's
+#    contents of without modification.
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Usage (as invoked in bootstraph.sh):
+#
+#   source parse_command_line.sh
+#   parse_command_line ${@:2}
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+### Parse commandline options
+#####################################################################
+function parse_command_line(){
+# source this script and pass $@ as the argument
+
+# Translate usage string -> getopts arguments, and set $arg_<flag> defaults
+# shellcheck disable=SC2154
+while read -r line; do
+  # fetch single character version of option string
+  opt="$(echo "${line}" |awk '{print $1}' |sed -e 's#^-##')"
+
+  # fetch long version if present
+  long_opt="$(echo "${line}" |awk '/\-\-/ {print $2}' |sed -e 's#^--##')"
+  long_opt_mangled="$(sed 's#-#_#g' <<< "$long_opt")"
+
+  # map long name back to short name
+  varname="short_opt_${long_opt_mangled}"
+  eval "${varname}=\"${opt}\""
+
+  # check if option takes an argument
+  varname="has_arg_${opt}"
+  if ! echo "${line}" |egrep '\[.*\]' >/dev/null 2>&1; then
+    init="0" # it's a flag. init with 0
+    eval "${varname}=0"
+  else
+    opt="${opt}:" # add : if opt has arg
+    init=""  # it has an arg. init with ""
+    eval "${varname}=1"
+  fi
+  opts="${opts:-}${opt}"
+
+  varname="arg_${opt:0:1}"
+  if ! echo "${line}" |egrep '\. Default=' >/dev/null 2>&1; then
+    eval "${varname}=\"${init}\""
+  else
+    match="$(echo "${line}" |sed 's#^.*Default=\(\)#\1#g')"
+    eval "${varname}=\"${match}\""
+  fi
+done < "${__usage}"
+
+
+# Allow long options like --this
+opts="${opts}-:"
+
+# Reset in case getopts has been used previously in the shell.
+OPTIND=1
+
+# start parsing command line
+set +o nounset # unexpected arguments will cause unbound variables
+               # to be dereferenced
+# Overwrite $arg_<flag> defaults with the actual CLI options
+while getopts "${opts}" opt; do
+  [ "${opt}" = "?" ] && help "Invalid use of script: ${*} "
+
+  if [ "${opt}" = "-" ]; then
+    # OPTARG is long-option-name or long-option=value
+    if [[ "${OPTARG}" =~ .*=.* ]]; then
+      # --key=value format
+      long=${OPTARG/=*/}
+      long_mangled="$(sed 's#-#_#g' <<< "$long")"
+      # Set opt to the short option corresponding to the long option
+      eval "opt=\"\${short_opt_${long_mangled}}\""
+      OPTARG=${OPTARG#*=}
+    else
+      # --key value format
+      # Map long name to short version of option
+      long_mangled="$(sed 's#-#_#g' <<< "$OPTARG")"
+      eval "opt=\"\${short_opt_${long_mangled}}\""
+      # Only assign OPTARG if option takes an argument
+      eval "OPTARG=\"\${@:OPTIND:\${has_arg_${opt}}}\""
+      # shift over the argument if argument is expected
+      ((OPTIND+=has_arg_${opt}))
+    fi
+    # we have set opt/OPTARG to the short value and the argument as OPTARG if it exists
+  fi
+  varname="arg_${opt:0:1}"
+  default="${!varname}"
+
+  value="${OPTARG}"
+  if [ -z "${OPTARG}" ] && [ "${default}" = "0" ]; then
+    value="1"
+  fi
+
+  eval "${varname}=\"${value}\""
+  debug "cli arg ${varname} = ($default) -> ${!varname}"
+done
+
+set -o nounset # no more unbound variable references expected
+
+shift $((OPTIND-1))
+
+# shellcheck disable=SC2015
+[ "${1:-}" = "--" ] && shift || true
+}
+export -f parse_command_line # make function available to subshells
+parse_command_line "${@:-}"  # invoke parsing function and allow for empty argument list
diff --git a/prerequisites/use-case/set_common_switches.sh b/prerequisites/use-case/set_common_switches.sh
new file mode 100644
index 0000000..67119c1
--- /dev/null
+++ b/prerequisites/use-case/set_common_switches.sh
@@ -0,0 +1,51 @@
+# BASH3 Boilerplate
+#
+#  set_common_switches.sh
+#
+#  - Sets variables that are useful in conjunction with other bash3boilerplate features
+#  - Contains commands extracted from the bash3boilerplate main.sh as part of a refactoring
+#    to facilitate wholesale reuse of main.sh's contents of without modification.
+#
+# Usage (as invoked in bootstrap.sh):
+#
+#  source set_common_switches.sh
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+### Switches (like -d for debugmode, -h for showing helppage)
+#####################################################################
+
+# shellcheck disable=SC2154
+{
+# debug mode
+if [ "${arg_d}" = "1" ]; then
+  set -o xtrace
+  LOG_LEVEL="7"
+fi
+
+# verbose mode
+if [ "${arg_e}" = "1" ]; then
+  set -o verbose
+fi
+
+# help mode
+if [ "${arg_h}" = "1" ]; then
+  # Help exists with code 1
+  help "Help using ${0}"
+fi
+}
diff --git a/prerequisites/use-case/set_environment_and_color.sh b/prerequisites/use-case/set_environment_and_color.sh
new file mode 100644
index 0000000..40a413b
--- /dev/null
+++ b/prerequisites/use-case/set_environment_and_color.sh
@@ -0,0 +1,44 @@
+# BASH3 Boilerplate
+#
+#  set_environment_and_color.sh
+#
+#  - Sets variables that control the behavior of the invoking script.
+#  - Contains commands extracted from the bash3boilerplate main.sh as
+#    part of a refactoring to facilitate wholesale reuse of main.sh's
+#    contents of without modification.
+#
+# Usage (as invoked in bootstrap.sh):
+#
+#  source set_environment_and_color.sh
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.0.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# Exit on error. Append ||true if you expect an error.
+# `set` is safer than relying on a shebang like `#!/bin/bash -e` because that is neutralized
+# when someone runs your script as `bash yourscript.sh`
+set -o errexit
+set -o nounset
+
+# Bash will remember & return the highest exitcode in a chain of pipes.
+# This way you can catch the error in case mysqldump fails in `mysqldump |gzip`
+set -o pipefail
+# set -o xtrace
+
+# Environment variables and their defaults
+LOG_LEVEL="${LOG_LEVEL:-6}" # 7 = debug -> 0 = emergency
+NO_COLOR="${NO_COLOR:-}"    # true = disable color. otherwise autodetected
diff --git a/prerequisites/use-case/set_magic_variables.sh b/prerequisites/use-case/set_magic_variables.sh
new file mode 100644
index 0000000..50d74dd
--- /dev/null
+++ b/prerequisites/use-case/set_magic_variables.sh
@@ -0,0 +1,46 @@
+# BASH3 Boilerplate
+#
+#  set_magic_variables.sh
+#
+#  - Sets the variables __dir, __file, __filename, __base, and __os
+#  - Defines a function containing commands extracted from the bash3boilerplate
+#    main.sh as part of a refactoring to facilitate wholesale reuse of main.sh's
+#    contents of without modification.
+#
+#  Usage (as invoked in bootstrap.sh):
+#
+#    source set_magic_variables.sh "$(caller 0)"
+#
+# More info:
+#
+#  - https://github.com/kvz/bash3boilerplate
+#  - http://kvz.io/blog/2013/02/26/introducing-bash3boilerplate/
+#
+# Version: 2.1.0
+#
+# Authors:
+#
+#  - Kevin van Zonneveld (http://kvz.io)
+#  - Izaak Beekman (https://izaakbeekman.com/)
+#  - Alexander Rathai (Alexander.Rathai at gmail.com)
+#  - Dr. Damian Rouson (http://www.sourceryinstitute.org/) (documentation)
+#
+# Licensed under MIT
+# Copyright (c) 2013 Kevin van Zonneveld (http://kvz.io)
+
+# shellcheck disable=SC2016
+[ -z "${1}" ] && echo 'Usage: source set_magic_variables.sh "$(caller 0)"'
+# shellcheck disable=SC2034
+function set_magic_variables(){
+  text_after_final_space="${1##* }"
+  __dir="$(cd "$(dirname "${text_after_final_space}")" && pwd)"
+  __file="${__dir}/$(basename "${text_after_final_space}")"
+  __filename="$(basename "${text_after_final_space}")"
+  __base="$(basename "${__file}" .sh)"
+  __os="Linux"
+  if [[ "${OSTYPE:-}" == "darwin"* ]]; then
+    __os="OSX"
+  fi
+  __usage="${__usage:-${__file}-usage}"
+}
+set_magic_variables "${@}"
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..4cbb938
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(directories_to_build single mpi tests)
+foreach(directory ${directories_to_build})
+  add_subdirectory(${directory})
+endforeach()
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..582da69
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,63 @@
+include make.inc
+export
+
+.PHONY : common
+.PHONY : single
+.PHONY : gasnet
+.PHONY : mpi
+#.PHONY : armci
+
+mpi: common
+	$(MAKE) -C $@
+
+all: common single gasnet mpi # armci
+
+common:
+	$(MAKE) -C $@
+
+single: common
+	$(MAKE) -C $@
+
+#armci: common
+#	$(MAKE) -C $@
+
+gasnet: common
+	$(MAKE) -C $@
+
+test: single gasnet mpi #armci
+	$(MAKE) -C tests test
+test-mpi: mpi
+	$(MAKE) -C tests test-mpi
+test-gasnet: gasnet
+	$(MAKE) -C tests test-gasnet
+#test-armci: armci
+#	$(MAKE) -C tests test-armci
+test-single: single
+	$(MAKE) -C tests test-single
+
+run: single armci gasnet mpi
+	$(MAKE) -C testsuite run
+run-mpi: mpi
+	$(MAKE) -C testsuite run-mpi
+run-gasnet: gasnet
+	$(MAKE) -C testsuite run-gasnet
+#run-armci: armci
+#	$(MAKE) -C testsuite run-armci
+run-single: single
+	$(MAKE) -C testsuite run-single
+
+clean:
+#	$(MAKE) -k -C common clean
+#	$(MAKE) -k -C single clean
+#	$(MAKE) -k -C gasnet clean
+	$(MAKE) -k -C mpi clean
+#	$(MAKE) -k -C tests clean
+#	$(MAKE) -k -C armci clean
+
+distclean: clean
+	$(MAKE) -k -C common distclean
+#	$(MAKE) -k -C single distclean
+	$(MAKE) -k -C mpi distclean
+#	$(MAKE) -k -C gasnet distclean
+#	$(MAKE) -k -C testsuite distclean
+#	(MAKE) -k -C armci distclean
diff --git a/src/armci/Makefile b/src/armci/Makefile
new file mode 100644
index 0000000..01aac76
--- /dev/null
+++ b/src/armci/Makefile
@@ -0,0 +1,21 @@
+include ../make.inc
+
+all: libcaf_armci.a
+
+libcaf_armci.a: armci.o ../common/caf_auxiliary.o
+	ar rcv $@ armci.o ../common/caf_auxiliary.o
+	ranlib $@
+
+.c.o:
+	$(CC) $(CFLAGS) $(ARMCI_CFLAGS) -I.. -c $< -o $@
+
+armci.o: armci.c ../libcaf.h ../libcaf-gfortran-descriptor.h
+
+../common/caf_auxiliary.o:
+	$(MAKE) -C ../common
+
+clean:
+	rm -f armci.o
+
+distclean: clean
+	rm -f libcaf_armci.a
diff --git a/src/armci/armci.c b/src/armci/armci.c
new file mode 100644
index 0000000..7e0e3b4
--- /dev/null
+++ b/src/armci/armci.c
@@ -0,0 +1,1006 @@
+/* ARMCI implementation of Libcaf
+
+Copyright (c) 2012-2014, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+/****l* armci/armci_caf.c
+ * NAME
+ *   armci_caf
+ * SYNOPSIS
+ *   This program implements the LIBCAF_ARMCI transport layer.  This
+ *   library is incomplete and unsupported. It exists to serve as a
+ *   starting point for potential future development.
+******
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>	/* For memcpy.  */
+#include <stdarg.h>	/* For variadic arguments.  */
+#include <sched.h>	/* For sched_yield.  */
+#include <message.h>    /* ARMCI and armci_msg_*.  */
+#include <complex.h>
+
+#include "libcaf.h"
+
+
+/* Define GFC_CAF_CHECK to enable run-time checking.  */
+/* #define GFC_CAF_CHECK  1  */
+
+typedef void ** armci_token_t;
+#define TOKEN(X) ((armci_token_t) (X))
+
+
+static void error_stop (int error) __attribute__ ((noreturn));
+
+/* Global variables.  */
+static int caf_this_image;
+static int caf_num_images;
+static int caf_is_finalized;
+
+caf_static_t *caf_static_list = NULL;
+
+static int **arrived;
+static int *orders;
+static int sizeOrders = 0;
+static int *images_full;
+
+/* Keep in sync with single.c.  */
+static void
+caf_runtime_error (const char *message, ...)
+{
+  va_list ap;
+  fprintf (stderr, "Fortran runtime error on image %d: ", caf_this_image);
+  va_start (ap, message);
+  vfprintf (stderr, message, ap);
+  va_end (ap);
+  fprintf (stderr, "\n");
+
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  /* FIXME: Do some more effort than just to abort.  */
+  armci_msg_abort (EXIT_FAILURE);
+
+  /* Should be unreachable, but to make sure also call exit.  */
+  exit (EXIT_FAILURE);
+}
+
+
+/* Initialize coarray program.  This routine assumes that no other
+   ARMCI initialization happened before. */
+
+void
+PREFIX (init) (int *argc, char ***argv)
+{
+  int ierr=0, i = 0, j = 0;
+
+  if (caf_num_images != 0)
+    return;  /* Already initialized.  */
+
+  armci_msg_init (argc, argv);
+  if (unlikely ((ierr = ARMCI_Init()) != 0))
+    caf_runtime_error ("Failure when initializing ARMCI: %d", ierr);
+
+  caf_num_images = armci_msg_nproc ();
+  caf_this_image = armci_msg_me ();
+  caf_this_image++;
+  caf_is_finalized = 0;
+
+  images_full = (int *) calloc (caf_num_images-1, sizeof (int));
+
+  ierr = ARMCI_Create_mutexes (1);
+
+  for (i = 0; i < caf_num_images; i++)
+    if (i + 1 != caf_this_image)
+      {
+	images_full[j] = i + 1;
+	j++;
+      }
+
+  orders = calloc (caf_num_images, sizeof (int));
+
+  arrived = malloc(sizeof (int *) * caf_num_images);
+
+  ierr = ARMCI_Malloc ((void **) arrived, sizeof (int) * caf_num_images);
+
+  for (i = 0; i < caf_num_images; i++)
+    arrived[caf_this_image-1][i] = 0;
+}
+
+
+/* Finalize coarray program.   */
+
+void
+PREFIX (finalize) (void)
+{
+  while (caf_static_list != NULL)
+    {
+      caf_static_t *tmp = caf_static_list->prev;
+
+      (void) ARMCI_Free (TOKEN (caf_static_list->token)[caf_this_image-1]);
+      free (TOKEN (caf_static_list->token));
+      free (caf_static_list);
+      caf_static_list = tmp;
+    }
+
+  (void) ARMCI_Finalize ();
+  armci_msg_finalize ();
+
+  caf_is_finalized = 1;
+}
+
+
+int
+PREFIX (this_image) (int distance __attribute__ ((unused)))
+{
+  return caf_this_image;
+}
+
+
+int
+PREFIX (num_images) (int distance __attribute__ ((unused)),
+		     int failed __attribute__ ((unused)))
+{
+  return caf_num_images;
+}
+
+
+void *
+PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
+		   int *stat, char *errmsg, int errmsg_len)
+{
+  int ierr = 0;
+
+  if (unlikely (caf_is_finalized))
+    goto error;
+
+  /* Start ARMCI if not already started.  */
+  if (caf_num_images == 0)
+    PREFIX (init) (NULL, NULL);
+
+  *token = malloc (sizeof (armci_token_t));
+
+  if (*token == NULL)
+    goto error;
+
+  *token = malloc (sizeof (void*) * caf_num_images);
+  if (TOKEN (*token) == NULL)
+    goto error;
+
+  ierr = ARMCI_Malloc (TOKEN (*token), size);
+
+  if (unlikely (ierr))
+    {
+      free (TOKEN (*token));
+      goto error;
+    }
+
+  if (type == CAF_REGTYPE_COARRAY_STATIC)
+    {
+      caf_static_t *tmp = malloc (sizeof (caf_static_t));
+      tmp->prev  = caf_static_list;
+      tmp->token = *token;
+      caf_static_list = tmp;
+    }
+
+  if (stat)
+    *stat = 0;
+
+  return TOKEN (*token)[caf_this_image-1];
+
+error:
+  {
+    char *msg;
+
+    if (caf_is_finalized)
+      msg = "Failed to allocate coarray - there are stopped images";
+    else
+      msg = "Failed to allocate coarray";
+
+    if (stat)
+      {
+	*stat = caf_is_finalized ? STAT_STOPPED_IMAGE : 1;
+	if (errmsg_len > 0)
+	  {
+	    int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+							: (int) strlen (msg);
+	    memcpy (errmsg, msg, len);
+	    if (errmsg_len > len)
+	      memset (&errmsg[len], ' ', errmsg_len-len);
+	  }
+      }
+    else
+      caf_runtime_error (msg);
+  }
+
+  return NULL;
+}
+
+
+void
+PREFIX (deregister) (caf_token_t *token, int *stat, char *errmsg, int errmsg_len)
+{
+  int ierr=0;
+
+  if (unlikely (caf_is_finalized))
+    {
+      const char msg[] = "Failed to deallocate coarray - "
+			  "there are stopped images";
+      if (stat)
+	{
+	  *stat = STAT_STOPPED_IMAGE;
+
+	  if (errmsg_len > 0)
+	    {
+	      int len = ((int) sizeof (msg) - 1 > errmsg_len)
+			? errmsg_len : (int) sizeof (msg) - 1;
+	      memcpy (errmsg, msg, len);
+	      if (errmsg_len > len)
+		memset (&errmsg[len], ' ', errmsg_len-len);
+	    }
+	  return;
+	}
+      caf_runtime_error (msg);
+    }
+
+  PREFIX (sync_all) (NULL, NULL, 0);
+
+  if (stat)
+    *stat = 0;
+
+  if (unlikely (ierr = ARMCI_Free (TOKEN (*token)[caf_this_image-1])))
+    caf_runtime_error ("ARMCI memory freeing failed: Error code %d", ierr);
+
+  free (TOKEN (*token));
+}
+
+
+void
+PREFIX (sync_all) (int *stat, char *errmsg, int errmsg_len)
+{
+  int ierr=0;
+
+  if (unlikely (caf_is_finalized))
+    ierr = STAT_STOPPED_IMAGE;
+  else
+    {
+      ARMCI_AllFence ();
+      armci_msg_barrier ();
+      ierr = 0;
+    }
+
+  if (stat)
+    *stat = ierr;
+
+  if (ierr)
+    {
+      char *msg;
+      if (caf_is_finalized)
+	msg = "SYNC ALL failed - there are stopped images";
+      else
+	msg = "SYNC ALL failed";
+
+      if (errmsg_len > 0)
+	{
+	  int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+						      : (int) strlen (msg);
+	  memcpy (errmsg, msg, len);
+	  if (errmsg_len > len)
+	    memset (&errmsg[len], ' ', errmsg_len-len);
+	}
+      else
+	caf_runtime_error (msg);
+    }
+}
+
+
+/* Send scalar (or contiguous) data from buffer to a remote image.  */
+
+/* token: The token of the array to be written to. */
+/* offset: Difference between the coarray base address and the actual data, used for caf(3)[2] = 8 or caf[4]%a(4)%b = 7. */
+/* image_index: Index of the coarray (typically remote, though it can also be on this_image). */
+/* data: Pointer to the to-be-transferred data. */
+/* size: The number of bytes to be transferred. */
+/* asynchronous: Return before the data transfer has been complete  */
+
+void
+PREFIX (send) (caf_token_t token, size_t offset, int image_index, void *data,
+	       size_t size, bool async)
+{
+  int ierr=0;
+
+  if (unlikely (size == 0))
+    return;  /* Zero-sized array.  */
+
+  if (image_index == caf_this_image)
+    {
+       void *dest = (void *) ((char *) TOKEN (token)[image_index-1] + offset);
+       memmove (dest, data, size);
+       return;
+    }
+
+  if (!async)
+    ierr = ARMCI_Put (data, TOKEN (token)[image_index-1] + offset, size,
+		      image_index - 1);
+  else
+    ierr = ARMCI_NbPut (data, TOKEN (token)[image_index-1] + offset, size,
+			image_index-1, NULL);
+
+  if (ierr != 0)
+    error_stop (ierr);
+}
+
+
+/* Send array data from src to dest on a remote image.  */
+
+void
+PREFIX (send_desc) (caf_token_t token, size_t offset, int image_index,
+		    gfc_descriptor_t *dest, gfc_descriptor_t *src, bool async)
+{
+  int ierr = 0;
+  size_t i, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  if (PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src))
+    {
+      void *dst = (void *)((char *) TOKEN (token)[image_index-1] + offset);
+
+      if (image_index == caf_this_image)
+	memmove (dst, src->base_addr, GFC_DESCRIPTOR_SIZE (dest)*size);
+      else if (!async)
+	ierr = ARMCI_Put (src->base_addr, dst, GFC_DESCRIPTOR_SIZE (dest)*size,
+			  image_index - 1);
+      else
+	ierr = ARMCI_NbPut (src->base_addr, dst,
+			    GFC_DESCRIPTOR_SIZE (dest)*size,
+			    image_index-1, NULL);
+      if (ierr != 0)
+	error_stop (ierr);
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_dst = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset_dst += ((i / (extent*stride))
+			       % (dest->dim[j]._ubound
+				  - dest->dim[j].lower_bound + 1))
+			      * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+
+      ptrdiff_t array_offset_sr = 0;
+      stride = 1;
+      extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	{
+	  array_offset_sr += ((i / (extent*stride))
+			   % (src->dim[j]._ubound
+			      - src->dim[j].lower_bound + 1))
+			  * src->dim[j]._stride;
+	  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+          stride = src->dim[j]._stride;
+	}
+      array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+
+      void *dst = (void *)((char *) TOKEN (token)[image_index-1] + offset
+			   + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest));
+      void *sr = (void *)((char *) src->base_addr
+			  + array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+      if (image_index == caf_this_image)
+	memmove (dst, sr, GFC_DESCRIPTOR_SIZE (dest));
+      else if (!async)
+        ierr = ARMCI_Put (sr, dst, GFC_DESCRIPTOR_SIZE (dest), image_index - 1);
+      else
+        ierr = ARMCI_NbPut (sr, dst, GFC_DESCRIPTOR_SIZE (dest),
+			    image_index - 1, NULL);
+      if (ierr != 0)
+	{
+	  error_stop (ierr);
+	  return;
+	}
+    }
+}
+
+
+/* Send scalar data from src to array dest on a remote image.  */
+
+void
+PREFIX (send_desc_scalar) (caf_token_t token, size_t offset, int image_index,
+			   gfc_descriptor_t *dest, void *buffer, bool async)
+{
+  int ierr = 0;
+  size_t i, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset += ((i / (extent*stride))
+			   % (dest->dim[j]._ubound
+			      - dest->dim[j].lower_bound + 1))
+			  * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset += (i / extent) * dest->dim[rank-1]._stride;
+      void *dst = (void *)((char *) TOKEN (token)[image_index-1] + offset
+			   + array_offset*GFC_DESCRIPTOR_SIZE (dest));
+      if (image_index == caf_this_image)
+	memmove (dst, buffer, GFC_DESCRIPTOR_SIZE (dest));
+      else if (!async)
+        ierr = ARMCI_Put (buffer, dst, GFC_DESCRIPTOR_SIZE (dest),
+			  image_index - 1);
+      else
+        ierr = ARMCI_NbPut (buffer, dst, GFC_DESCRIPTOR_SIZE (dest),
+			    image_index-1, NULL);
+      if (ierr != 0)
+	{
+	  error_stop (ierr);
+	  return;
+	}
+    }
+}
+
+
+void
+PREFIX (get) (caf_token_t token, size_t offset, int image_index, void *data,
+	      size_t size, bool async)
+{
+  int ierr = 0;
+
+  if (unlikely (size == 0))
+    return;  /* Zero-sized array.  */
+
+  if (image_index == caf_this_image)
+    memmove (data, TOKEN (token)[image_index-1] + offset, size);
+  else if (async == false)
+    ierr = ARMCI_Get (TOKEN (token)[image_index-1] + offset, data, size,
+		      image_index - 1);
+  else
+    ierr = ARMCI_NbGet (TOKEN (token)[image_index-1] + offset, data, size,
+			image_index - 1, NULL);
+
+  if (ierr != 0)
+    error_stop (ierr);
+}
+
+
+/* Get array data from a remote src to a local dest.  */
+
+void
+PREFIX (get_desc) (caf_token_t token, size_t offset, int image_index,
+		   gfc_descriptor_t *src, gfc_descriptor_t *dest,
+		   bool async __attribute__ ((unused)))
+{
+  size_t i, size;
+  int ierr = 0;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  if (PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src))
+    {
+      void *sr = (void *) ((char *) TOKEN(token)[image_index-1] + offset);
+      if (image_index == caf_this_image)
+	memmove (dest->base_addr, sr, GFC_DESCRIPTOR_SIZE (dest)*size);
+      else if (async == false)
+	ierr = ARMCI_Get (sr, dest->base_addr, GFC_DESCRIPTOR_SIZE (dest)*size,
+			  image_index - 1);
+      else
+	ierr = ARMCI_NbGet (sr, dest->base_addr,
+			    GFC_DESCRIPTOR_SIZE (dest)*size, image_index - 1,
+			    NULL);
+      if (ierr != 0)
+	error_stop (ierr);
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_dst = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset_dst += ((i / (extent*stride))
+			       % (dest->dim[j]._ubound
+				  - dest->dim[j].lower_bound + 1))
+			      * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+
+      ptrdiff_t array_offset_sr = 0;
+      stride = 1;
+      extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	{
+	  array_offset_sr += ((i / (extent*stride))
+			   % (src->dim[j]._ubound
+			      - src->dim[j].lower_bound + 1))
+			  * src->dim[j]._stride;
+	  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+          stride = src->dim[j]._stride;
+	}
+      array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+
+      void *sr = (void *)((char *) TOKEN (token)[image_index-1] + offset
+			   + array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+      void *dst = (void *)((char *) dest->base_addr
+			  + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest));
+      if (image_index == caf_this_image)
+	memmove (dst, sr, GFC_DESCRIPTOR_SIZE (dest));
+      else if (async == false)
+	ierr = ARMCI_Get (sr, dst, GFC_DESCRIPTOR_SIZE (dest), image_index - 1);
+      else
+	ierr = ARMCI_NbGet (sr, dst, GFC_DESCRIPTOR_SIZE (dest),
+			    image_index - 1, NULL);
+      if (ierr != 0)
+	error_stop (ierr);
+    }
+}
+
+
+/* SYNC IMAGES. Note: SYNC IMAGES(*) is passed as count == -1 while
+   SYNC IMAGES([]) has count == 0. Note further that SYNC IMAGES(*)
+   is not equivalent to SYNC ALL. */
+
+void
+PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
+		      int errmsg_len)
+{
+  int i, ierr=0;
+  bool freeToGo = false;
+
+  if (count == 0 || (count == 1 && images[0] == caf_this_image))
+    {
+      if (stat)
+	*stat = 0;
+      return;
+    }
+
+#ifdef GFC_CAF_CHECK
+  {
+    for (i = 0; i < count; i++)
+      if (images[i] < 1 || images[i] > caf_num_images)
+	{
+	  fprintf (stderr, "COARRAY ERROR: Invalid image index %d to SYNC "
+		   "IMAGES", images[i]);
+	  error_stop (1);
+	}
+  }
+#endif
+
+  /* FIXME: SYNC IMAGES with a nontrivial argument cannot easily be
+     mapped to ARMCI communicators. Thus, exist early with an error message.  */
+
+  /* Handle SYNC IMAGES(*).  */
+  if (unlikely (caf_is_finalized))
+    ierr = STAT_STOPPED_IMAGE;
+  else
+    {
+      /* Insert orders.  */
+      if(count == -1)
+	{
+	  for (i = 0; i < caf_num_images - 1; i++)
+	    orders[images_full[i]-1]++;
+	  count = caf_num_images-1;
+	  images = images_full;
+	}
+      else
+	{
+	  for (i = 0; i < count; i++)
+	    orders[images[i]-1]++;
+	}
+
+      /* Sending ack.  */
+
+      int val;
+
+      for (i = 0; i < count; i++)
+	{
+	  ARMCI_Lock (0, images[i]-1);
+
+	  val = ARMCI_GetValueInt (
+			(void *) &arrived[images[i]-1][caf_this_image-1],
+			images[i]-1);
+	  val++;
+	  ierr = ARMCI_PutValueInt (
+			val, (void *) &arrived[images[i]-1][caf_this_image-1],
+			images[i]-1);
+	  ARMCI_Unlock (0, images[i]-1);
+	}
+
+      while (!freeToGo)
+	{
+	  ARMCI_Lock (0, caf_this_image-1);
+
+	  sizeOrders = 0;
+
+	  for (i = 0; i < caf_num_images; i++)
+	    {
+	      if (orders[i] != 0)
+		{
+		  sizeOrders++;
+		  val = ARMCI_GetValueInt (
+				(void *) &arrived[caf_this_image-1][i],
+				caf_this_image-1);
+		  /* val = arrived[caf_this_image-1][i]; */
+		  if (val != 0)
+		    {
+		      orders[i]--;
+		      sizeOrders--;
+		      val--;
+		      ierr = ARMCI_PutValueInt (
+				val, (void *) &arrived[caf_this_image-1][i],
+				caf_this_image-1);
+		    }
+		}
+	    }
+
+	  if (sizeOrders == 0)
+	    freeToGo=true;
+
+	  ARMCI_Unlock (0, caf_this_image-1);
+	  sched_yield ();
+	}
+
+      freeToGo = false;
+    }
+
+  if (stat)
+    *stat = ierr;
+
+  if (ierr)
+    {
+      char *msg;
+      if (caf_is_finalized)
+	msg = "SYNC IMAGES failed - there are stopped images";
+      else
+	msg = "SYNC IMAGES failed";
+
+      if (errmsg_len > 0)
+	{
+	  int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+						      : (int) strlen (msg);
+	  memcpy (errmsg, msg, len);
+	  if (errmsg_len > len)
+	    memset (&errmsg[len], ' ', errmsg_len-len);
+	}
+      else
+	caf_runtime_error (msg);
+    }
+}
+
+#if 0
+
+/* FIXME: The following needs to be fixed - in particular for result_image > 0;
+   It is unclear what's the difference between armci_msg_igop and
+   armci_msg_reduce; in particular, how to state that the result is only saved
+   on a certain image?  */
+static void
+co_reduce_2 (char *op, int result_image, gfc_descriptor_t *source,
+             gfc_descriptor_t *result, void *sr, void *dst,
+	     size_t size)
+{
+  void *arg;
+  if (result && GFC_DESCRIPTOR_TYPE (source) != BT_COMPLEX)
+    memmove (dst, sr, GFC_DESCRIPTOR_SIZE (source)*size);
+  else
+    arg = sr;
+
+  if (result_image == 0)
+    switch (GFC_DESCRIPTOR_TYPE (source))
+      {
+      BT_INTEGER:
+	if (GFC_DESCRIPTOR_SIZE (source) == sizeof (int))
+	  armci_msg_igop (arg, size, op);
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (long))
+	  armci_msg_lgop (arg, size, op);
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (long long))
+	  armci_msg_llgop (arg, size, op);
+        else
+	  goto error;
+	break;
+      BT_REAL:
+	if (GFC_DESCRIPTOR_SIZE (source) == sizeof (float))
+	  armci_msg_fgop (arg, size, op);
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (double))
+	  armci_msg_dgop (arg, size, op);
+        else
+	  goto error;
+	break;
+      BT_COMPLEX:
+	if (GFC_DESCRIPTOR_SIZE (source) == sizeof (float) && size == 1)
+	  {
+	    float re = __real__ *(_Complex float*) sr;
+	    float im = __imag__ *(_Complex float*) sr;
+	    armci_msg_fgop (&re, 1, op);
+	    armci_msg_fgop (&im, 1, op);
+	    if (result)
+	      *(_Complex float*) dst = re + im * _Complex_I;
+	    else
+	      *(_Complex float*) sr = re + im * _Complex_I;
+	  }
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (double) && size == 1)
+       	  {
+	    double re = __real__ *(_Complex double*) sr;
+	    double im = __imag__ *(_Complex double*) sr;
+	    armci_msg_dgop (&re, 1, op);
+	    armci_msg_dgop (&im, 1, op);
+	    if (result)
+	      *(_Complex double*) dst = re + im * _Complex_I;
+	    else
+	      *(_Complex double*) sr = re + im * _Complex_I;
+	  }
+	else
+	  goto error;
+	break;
+      default:
+	goto error;
+      }
+  else
+    switch (GFC_DESCRIPTOR_TYPE (source))
+      {
+      BT_INTEGER:
+	if (GFC_DESCRIPTOR_SIZE (source) == sizeof (int))
+	  armci_msg_reduce(arg, size, op, ARMCI_INT, result_image-1);
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (long))
+	  armci_msg_reduce(arg, size, op, ARMCI_LONG, result_image-1);
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (long long))
+	  armci_msg_reduce(arg, size, op, ARMCI_LONG_LONG, result_image-1);
+        else
+	  goto error;
+	break;
+      BT_REAL:
+	if (GFC_DESCRIPTOR_SIZE (source) == sizeof (float))
+	  armci_msg_reduce(arg, size, op, ARMCI_FLOAT, result_image-1);
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (double))
+	  armci_msg_reduce(arg, size, op, ARMCI_DOUBLE, result_image-1);
+        else
+	  goto error;
+	break;
+      BT_COMPLEX:
+	if (GFC_DESCRIPTOR_SIZE (source) == sizeof (float) && size == 1)
+	  {
+	    double re = __real__ *(_Complex double*) sr;
+	    double im = __imag__ *(_Complex double*) sr;
+	    armci_msg_reduce(&re, 1, op, ARMCI_FLOAT, result_image-1);
+	    armci_msg_reduce(&im, 1, op, ARMCI_FLOAT, result_image-1);
+	    if (result)
+	      *(_Complex double*) dst = re + im * _Complex_I;
+	    else
+	      *(_Complex double*) sr = re + im * _Complex_I;
+	  }
+	else if (GFC_DESCRIPTOR_SIZE (source) == sizeof (double) && size == 1)
+  	  {
+	    double re = __real__ *(_Complex double*) sr;
+	    double im = __imag__ *(_Complex double*) sr;
+	    armci_msg_reduce(&re, 1, op, ARMCI_DOUBLE, result_image-1);
+	    armci_msg_reduce(&im, 1, op, ARMCI_DOUBLE, result_image-1);
+	    if (result)
+	      *(_Complex double*) dst = re + im * _Complex_I;
+	    else
+	      *(_Complex double*) sr = re + im * _Complex_I;
+	  }
+	else
+	  goto error;
+	break;
+      default:
+	goto error;
+      }
+  return;
+error:
+    /* FIXME: Handle the other data types as well.  */
+    caf_runtime_error ("Unsupported data type in collective\n");
+}
+
+
+static void
+co_reduce_1 (char *op, gfc_descriptor_t *source,
+	     gfc_descriptor_t *result, int result_image, int *stat,
+	     char *errmsg, int errmsg_len)
+{
+  void *source2, *result2;
+  size_t i, size;
+  int j, ierr;
+  int rank = GFC_DESCRIPTOR_RANK (source);
+
+  if (stat)
+    *stat = 0;
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = source->dim[j]._ubound
+			    - source->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size = 1
+      || (GFC_DESCRIPTOR_TYPE (source) != BT_COMPLEX
+	  && PREFIX (is_contiguous) (source)
+	  && (!result || PREFIX (is_contiguous) (result))))
+    {
+      source2 = source->base_addr;
+      result2 = result ? result->base_addr : NULL;
+      co_reduce_2 (op, result_image, source, result, source2, result2, size);
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_sr = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (source)-1; j++)
+	{
+	  array_offset_sr += ((i / (extent*stride))
+			   % (source->dim[j]._ubound
+			      - source->dim[j].lower_bound + 1))
+			  * source->dim[j]._stride;
+	  extent = (source->dim[j]._ubound - source->dim[j].lower_bound + 1);
+          stride = source->dim[j]._stride;
+	}
+      array_offset_sr += (i / extent) * source->dim[rank-1]._stride;
+      void *sr = (void *)((char *) source->base_addr
+			  + array_offset_sr*GFC_DESCRIPTOR_SIZE (source));
+      void *dst = NULL;
+      if (result)
+	{
+	  ptrdiff_t array_offset_dst = 0;
+	  stride = 1;
+	  extent = 1;
+	  for (j = 0; j < rank-1; j++)
+	    {
+	      array_offset_dst += ((i / (extent*stride))
+				   % (result->dim[j]._ubound
+				   - result->dim[j].lower_bound + 1))
+				  * result->dim[j]._stride;
+	      extent = (result->dim[j]._ubound - result->dim[j].lower_bound + 1);
+	      stride = result->dim[j]._stride;
+	    }
+	  array_offset_dst += (i / extent) * result->dim[rank-1]._stride;
+	  dst = (void *)((char *) result->base_addr
+			 + array_offset_dst*GFC_DESCRIPTOR_SIZE (source));
+	}
+
+      result2 = result ? dst : NULL;
+      co_reduce_2 (op, result_image, source, result, sr, result2, 1);
+    }
+  return;
+}
+
+
+void
+PREFIX (co_sum) (gfc_descriptor_t *source, gfc_descriptor_t *result,
+                 int result_image, int *stat, char *errmsg, int errmsg_len)
+{
+  co_reduce_1 ("+", source, result, result_image, stat, errmsg, errmsg_len);
+}
+
+
+void
+PREFIX (co_min) (gfc_descriptor_t *source, gfc_descriptor_t *result,
+                 int result_image, int *stat, char *errmsg, int errmsg_len)
+{
+  co_reduce_1 ("min", source, result, result_image, stat, errmsg, errmsg_len);
+}
+
+
+void
+PREFIX (co_max) (gfc_descriptor_t *source, gfc_descriptor_t *result,
+                 int result_image, int *stat, char *errmsg, int errmsg_len)
+{
+  co_reduce_1 ("max", source, result, result_image, stat, errmsg, errmsg_len);
+}
+#endif
+
+
+/* ERROR STOP the other images.  */
+
+static void
+error_stop (int error)
+{
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  /* FIXME: Do some more effort than just ARMCI_Error.  */
+//  ARMCI_Error ("Aborting calculation", error);
+  ARMCI_Error (NULL, error);
+
+  /* Should be unreachable, but to make sure also call exit.  */
+  exit (error);
+}
+
+
+/* ERROR STOP function for string arguments.  */
+
+void
+PREFIX (error_stop_str) (const char *string, int32_t len)
+{
+  fputs ("ERROR STOP ", stderr);
+  while (len--)
+    fputc (*(string++), stderr);
+  fputs ("\n", stderr);
+
+  error_stop (1);
+}
+
+
+/* ERROR STOP function for numerical arguments.  */
+
+void
+PREFIX (error_stop) (int32_t error)
+{
+  fprintf (stderr, "ERROR STOP %d\n", error);
+  error_stop (error);
+}
diff --git a/src/common/Makefile b/src/common/Makefile
new file mode 100644
index 0000000..c57aa20
--- /dev/null
+++ b/src/common/Makefile
@@ -0,0 +1,9 @@
+include ../make.inc
+
+caf_auxiliary.o: caf_auxiliary.c ../libcaf.h ../libcaf-gfortran-descriptor.h
+	$(CC) -I.. $(CFLAGS) -c $< -o $@
+
+clean:
+	rm -f caf_auxiliary.o
+
+distclean: clean
diff --git a/src/common/caf_auxiliary.c b/src/common/caf_auxiliary.c
new file mode 100644
index 0000000..a5e0c75
--- /dev/null
+++ b/src/common/caf_auxiliary.c
@@ -0,0 +1,60 @@
+/* Auxiliary functions for all of GNU Fortran libcaf implementations.
+
+Copyright (c) 2012-2014, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#include "libcaf.h"
+
+
+/* Check whether the array section is contiguous.  There are two possibilities
+   either the stride matches always the extent of that dimension - or if the
+   "noncontiguous" dimensions have all extent one (= element access for that
+   dimension); a mixture is possible if the left dimensions are contiguous
+   and the right ones are elements.  */
+
+bool
+PREFIX (is_contiguous) (gfc_descriptor_t *array)
+{
+  int i;
+  ptrdiff_t dim_extent;
+  ptrdiff_t extent = 1;
+  bool element = false;
+
+  for (i = 0; i < GFC_DESCRIPTOR_RANK(array); i++)
+    {
+      if (!element && array->dim[i]._stride != extent)
+	return false;
+
+      dim_extent = array->dim[i]._ubound - array->dim[i].lower_bound + 1;
+      if (dim_extent <= 0)
+	return true;  /* Zero-sized array.  */
+      else if (dim_extent == 1 && GFC_DESCRIPTOR_RANK(array) == 1)
+        element = true;
+      else if (element)
+	return false;
+      extent *= dim_extent;
+    }
+  return true;
+}
diff --git a/src/extensions/caf-foot b/src/extensions/caf-foot
new file mode 100755
index 0000000..552945a
--- /dev/null
+++ b/src/extensions/caf-foot
@@ -0,0 +1,117 @@
+cmd=$(basename "$0")
+
+usage()
+{
+    echo ""
+    echo " $cmd - Fortran compiler wrapper for OpenCoarrays"
+    echo ""
+    echo " Usage: $cmd <fortran-source-file> [options] ..."
+    echo ""
+    echo " Options:"
+    echo "   --help, -h               Show this help message"
+    echo "   --version, -v, -V        Report version and copyright information"
+    echo "   --wrapping, -w, --wraps  Report the name of the wrapped compiler"
+    echo ""
+    echo " Example usage:"
+    echo ""
+    echo "   $cmd foo.f90 -o foo"
+    echo "   $cmd -v"
+    echo "   $cmd --help"
+    echo ""
+    echo "OpenCoarrays $caf_version $cmd supports three categories of compilers"
+    echo "with the following restrictions for each use case:"
+    echo ""
+    echo " 1. With an OpenCoarrays-Aware (OCA) compiler (GNU 5.1.0 or later),"
+    echo "   a. If any of the options listed above appear, any remaining arguments are ignored."
+    echo "   b. If present, <fortran-source-file> must"
+    echo "      * be a Fortran source file,"
+    echo "      * appear before all other arguments,"
+    echo "      * be the only Fortran source file in the argument list,"
+    echo "      * have a name of the form *.f90, *.F90, *.f, or *.F. "
+    echo "   c. The environment variable 'CAFC' must be empty or point to a Fortran compiler/linker. "
+    echo "   d. If 'CAFC' is empty, a default value of 'mpif90' is used. "
+    echo ""
+    echo " 2. With non-OCA CAF compilers (Intel or Cray),"
+    echo "   a. Observe restrictions 1a-d above."
+    echo "   b. Access OpenCoarrays collective subroutines via use association with an only clause,"
+    echo "      e.g., 'use opencoarrays, only : co_sum,co_broadcast' "
+    echo ""
+    echo " 3. With non-CAF compilers (all compilers not named above),"
+    echo "   a. Observe restrictions 1a-d above."
+    echo "   b. Access OpenCoarrays capabilities via use association ('use opencoarrays')."
+    echo "   c. The only CAF statements or expressions allowed are the following:"
+    echo "      * 'num_images()' "
+    echo "      * 'this_image()' with or without arguments"
+    echo "      * 'sync all' with or without arguments."
+    echo "      * 'sync images' with or without arguments."
+    echo "      * 'error stop' without arguments."
+    echo "      * 'co_sum', 'co_broadcast', 'co_max', 'co_min', or 'co_reduce'"
+    echo ""
+    echo " The caf wrapper will append -L, -l, and other required arguments as necessary"
+    echo " using values that get set during the OpenCoarrays build and installation."
+    echo ""
+
+    exit 1
+}
+
+# Print useage information if caf is invoked without arguments
+if [ $# == 0 ]; then
+  usage | less
+  exit 1
+fi
+
+# Default to "mpif90" Fortran compiler if environment variable CAFC has zero length:
+if [ -z "$CAFC" ]; then
+   CAFC=mpif90
+fi
+
+# TODO -- improve the syntax of the "set" command below to accepted an unlimited number of arguments
+max_arguments=100
+link_args="-fcoarray=lib -lcaf_mpi"
+
+if [[ $1 == '-v' || $1 == '-V' || $1 == '--version' ]]; then
+    echo ""
+    echo "OpenCoarrays Coarray Fortran Compiler Wrapper (caf version $caf_version)"
+    echo "Copyright (C) 2015-2016 Sourcery, Inc."
+    echo ""
+    echo "OpenCoarrays comes with NO WARRANTY, to the extent permitted by law."
+    echo "You may redistribute copies of OpenCoarrays under the terms of the"
+    echo "BSD 3-Clause License.  For more information about these matters, see"
+    echo "the file named LICENSE."
+    echo ""
+elif [[ $1 == '-w' || $1 == '--wrapping' || $1 == '--wraps' ]]; then
+  echo "caf wraps CAFC=$CAFC"
+elif [[ $1 == '-h' || $1 == '--help' ]]; then
+  # Print usage information
+  usage | less
+  exit 1
+elif [ "$caf_compiler" = "true" ]; then
+  # Nothing to do other than invoke the compiler with all the command-line arguments:
+  $CAFC "$@" -L "$caf_lib_dir" $link_args
+else
+  # Verify that a file with the .f90, .F90, .f, or .F extension is the first argument:
+  src_extension=$(echo "$1" | cut -f2 -d'.')
+  if [[ $src_extension == 'f90' || $src_extension == 'F90' || $src_extension == 'f' || $src_extension == 'F' ]]; then
+    # copy the source file into a new file for pre-processing (preprending "caf-" to the new file name):
+    cp "$1" "caf-$1"
+    # Edit the copied source to replace CAF syntax with calls to public procedures in opencoarrays.f90:
+    if [ "$linux" = "true" ]; then
+      sed -i'' 's/sync all/call sync_all/g' "caf-$1"
+      sed -i'' 's/error stop/call error_stop/g' "caf-$1"
+      sed -i'' 's/sync images/call sync_images/g' "caf-$1"
+    else
+      # This works on OS X and other POSIX-compliant operating systems:
+      sed -i '' 's/sync all/call sync_all/g' "caf-$1"
+      sed -i '' 's/error stop/call error_stop/g' "caf-$1"
+      sed -i '' 's/sync images/call sync_images/g' "caf-$1"
+    fi
+    # Replace the file name in command-line argment 1 with the new name beofre invoking the compiler:
+    set -- "caf-$1" "${@:2:$max_arguments}"
+    # Invoke the compiler along with all command-line arguments:
+    $CAFC "$@" -L "$caf_lib_dir" -I "$caf_mod_dir" $link_args
+  else
+    # Print usage information upon encountering an unknowon CAF source file extension
+    usage | less
+    exit 1
+  fi
+fi
diff --git a/src/extensions/caf-head b/src/extensions/caf-head
new file mode 100755
index 0000000..0ac67e7
--- /dev/null
+++ b/src/extensions/caf-head
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Coarray Fortran (CAF) Compiler Wrapper
+#
+# Invokes the chosen Fortran compiler with the received command-line
+# arguments.
+#
+# Copyright (c) 2015-2016, Sourcery, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Sourcery, Inc., nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
diff --git a/src/extensions/cafrun-foot b/src/extensions/cafrun-foot
new file mode 100644
index 0000000..8715308
--- /dev/null
+++ b/src/extensions/cafrun-foot
@@ -0,0 +1,47 @@
+Usage()
+{
+    cmd=$(basename "$0")
+    echo ""
+    echo " $cmd - Coarray Fortran executable launcher for OpenCoarrays"
+    echo ""
+    echo " Usage: $cmd [options] ..."
+    echo ""
+    echo " Options:"
+    echo "   --help, -h           Show this help message"
+    echo "   --version, -v, -V    Report version and copyright information"
+    echo "   --wraps, -w,         Report the name of the wrapped compiler"
+    echo ""
+    echo " Example usage:"
+    echo ""
+    echo "   $cmd -np 2 foo"
+    echo "   $cmd -v"
+    echo "   $cmd --help"
+    echo ""
+    echo " Notes:"
+    echo "   [options] must a CAF executable file, one of the above arguments,"
+    echo "   or an argument to the program name returned by caf --wraps"
+    echo ""
+    exit 1
+}
+
+# Print useage information if caf is invoked without arguments
+if [ $# == 0 ]; then
+  usage
+elif [[ $1 == '-v' || $1 == '-V' || $1 == '--version' ]]; then
+  echo ""
+  # shellcheck disable=SC2154
+  echo "OpenCoarrays Coarray Fortran Executable Launcher (caf version $caf_version)"
+  echo "Copyright (C) 2015-2016 Sourcery, Inc."
+  echo ""
+  echo "OpenCoarrays comes with NO WARRANTY, to the extent permitted by law."
+  echo "You may redistribute copies of OpenCoarrays under the terms of the"
+  echo "BSD 3-Clause License.  For more information about these matters, see"
+  echo "the file named LICENSE."
+  echo ""
+elif [[ $1 == '-w' || $1 == '--wraps' ]]; then
+  mpirun -v
+elif [[ $1 == '-h' || $1 == '--help' ]]; then
+  usage
+else
+  mpirun "$@"
+fi
diff --git a/src/extensions/cafrun-head b/src/extensions/cafrun-head
new file mode 100755
index 0000000..cb43b8d
--- /dev/null
+++ b/src/extensions/cafrun-head
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# Coarray Fortran (CAF) Executable Launcher
+#
+# Copyright (c) 2015-2016, Sourcery, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the Sourcery, Inc., nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This script invokes the chosen Fortran compiler with the received command-line
+# arguments.  Current assumptions:
+#    1. The only argument is either an informational flag or a CAF executable
+#       file.
+#    2. The environment variable "FC" is used to determine the identity fo the Fortran compiler/linker.
+#    3. If "FC" is empty, a default value of "mpif90" is used.
diff --git a/src/extensions/opencoarrays.F90 b/src/extensions/opencoarrays.F90
new file mode 100644
index 0000000..e6d1e7f
--- /dev/null
+++ b/src/extensions/opencoarrays.F90
@@ -0,0 +1,744 @@
+! Fortran 2015 feature support for Fortran 2008 compilers
+!
+! Copyright (c) 2015-2016, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+module opencoarrays
+#ifdef COMPILER_SUPPORTS_ATOMICS
+  use iso_fortran_env, only : atomic_int_kind
+#endif
+  use iso_c_binding, only : c_int,c_char,c_ptr,c_loc,c_double,c_int32_t,c_ptrdiff_t,c_sizeof,c_bool,c_funloc
+  implicit none
+
+  private
+  public :: co_reduce
+  public :: co_broadcast
+  public :: co_sum
+  public :: co_min
+  public :: co_max
+  public :: this_image
+  public :: num_images
+  public :: error_stop
+  public :: sync_all
+#ifdef COMPILER_SUPPORTS_ATOMICS
+  public :: event_type
+  public :: event_post
+
+  type event_type
+    private
+    integer(atomic_int_kind), allocatable :: atom[:]
+  end type
+#endif
+
+  ! Generic interface to co_broadcast with implementations for various types, kinds, and ranks
+  interface co_reduce
+     module procedure co_reduce_c_int,co_reduce_c_double,co_reduce_logical
+  end interface
+
+  ! Generic interface to co_broadcast with implementations for various types, kinds, and ranks
+  interface co_broadcast
+     module procedure co_broadcast_c_int,co_broadcast_c_double,co_broadcast_c_char
+  end interface
+
+  ! Generic interface to co_sum with implementations for various types, kinds, and ranks
+  interface co_sum
+     module procedure co_sum_c_int,co_sum_c_double
+  end interface
+
+  ! Generic interface to co_sum with implementations for various types, kinds, and ranks
+  interface co_min
+     module procedure co_min_c_int,co_min_c_double
+  end interface
+
+  ! Generic interface to co_sum with implementations for various types, kinds, and ranks
+  interface co_max
+     module procedure co_max_c_int,co_max_c_double
+  end interface
+
+  abstract interface
+     pure function c_int_operator(lhs,rhs) result(lhs_op_rhs)
+       import c_int
+       integer(c_int), intent(in) :: lhs,rhs
+       integer(c_int) :: lhs_op_rhs
+     end function
+     pure function c_double_operator(lhs,rhs) result(lhs_op_rhs)
+       import c_double
+       real(c_double), intent(in) :: lhs,rhs
+       real(c_double) :: lhs_op_rhs
+     end function
+     pure function logical_operator(lhs,rhs) result(lhs_op_rhs)
+       logical, intent(in) :: lhs,rhs
+       logical :: lhs_op_rhs
+     end function
+  end interface
+
+  ! __________ End Public Interface _____________
+
+
+  ! __________ Begin Private Implementation _____
+
+  ! Enumeration from ../libcaf-gfortran-descriptor.h:
+  !  enum
+  !{ BT_UNKNOWN = 0, BT_INTEGER, BT_LOGICAL, BT_REAL, BT_COMPLEX,
+  !  BT_DERIVED, BT_CHARACTER, BT_CLASS, BT_PROCEDURE, BT_HOLLERITH, BT_VOID,
+  !  BT_ASSUMED
+  !};
+
+  enum ,bind(C)
+    enumerator :: &
+     BT_UNKNOWN = 0, BT_INTEGER, BT_LOGICAL, BT_REAL, BT_COMPLEX, &
+     BT_DERIVED, BT_CHARACTER, BT_CLASS, BT_PROCEDURE, BT_HOLLERITH, BT_VOID, &
+     BT_ASSUMED
+  end enum
+
+  ! Type definition from ../libcaf-gfortran-descriptor.h:
+  !typedef struct descriptor_dimension
+  !{
+  !  ptrdiff_t _stride;
+  !  ptrdiff_t lower_bound;
+  !  ptrdiff_t _ubound;
+  !}
+  !descriptor_dimension;
+
+  ! Fortran derived type interoperable with like-named C type:
+  type, bind(C) :: descriptor_dimension
+     integer(c_ptrdiff_t) :: stride
+     integer(c_ptrdiff_t) :: lower_bound
+     integer(c_ptrdiff_t) :: ubound_
+  end type
+
+  ! Type definition from ../libcaf-gfortran-descriptor.h:
+  !typedef struct gfc_descriptor_t {
+  !  void *base_addr;
+  !  size_t offset;
+  !  ptrdiff_t dtype;
+  !  descriptor_dimension dim[];
+  !} gfc_descriptor_t;
+
+  integer, parameter :: max_dimensions=15
+
+  ! Fortran derived type interoperable with like-named C type:
+  type, bind(C) :: gfc_descriptor_t
+    type(c_ptr) :: base_addr
+    integer(c_ptrdiff_t) :: offset
+    integer(c_ptrdiff_t) :: dtype
+    type(descriptor_dimension) :: dim_(max_dimensions)
+  end type
+
+  ! C comment and source from ../libcaf.h
+  ! /* When there is a vector subscript in this dimension, nvec == 0, otherwise,
+  ! lower_bound, upper_bound, stride contains the bounds relative to the declared
+  ! bounds; kind denotes the integer kind of the elements of vector[].  */
+  ! type, bind(C) :: caf_vector_t {
+  !   size_t nvec;
+  !   union {
+  !     struct {
+  !       void *vector;
+  !       int kind;
+  !     } v;
+  !     struct {
+  !       ptrdiff_t lower_bound, upper_bound, stride;
+  !     } triplet;
+  !   } u;
+  ! }
+  ! caf_vector_t;
+
+  type, bind(C) :: v_t
+    type(c_ptr) :: vector
+    integer(c_int) :: kind_
+  end type
+
+  type, bind(C) :: triplet_t
+    integer(c_ptrdiff_t) :: lower_bound, upper_bound, stride
+  end type
+
+  type, bind(C) :: u_t
+     type(v_t) :: v
+     type(triplet_t) :: triplet
+  end type
+
+  type, bind(C) :: caf_vector_t
+    integer(c_ptrdiff_t) :: nvec
+    type(u_t) :: u
+  end type
+
+  ! --------------------
+
+  integer(c_int), save, volatile, bind(C,name="CAF_COMM_WORLD") :: CAF_COMM_WORLD
+  integer(c_int32_t), parameter  :: bytes_per_word=4_c_int32_t
+
+  interface gfc_descriptor
+    module procedure gfc_descriptor_c_int,gfc_descriptor_c_double,gfc_descriptor_logical
+  end interface
+
+  ! Bindings for OpenCoarrays C procedures
+  interface
+
+    ! C function signature from ../mpi/mpi_caf.c:
+    ! void
+    ! PREFIX (co_min) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
+    !                  int src_len, int errmsg_len)
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    subroutine opencoarrays_co_min(a,result_image, stat, errmsg, unused , errmsg_len) bind(C,name="_caf_extensions_co_min")
+#else
+    subroutine opencoarrays_co_min(a,result_image, stat, errmsg, unused, errmsg_len) bind(C,name="_gfortran_caf_co_min")
+#endif
+      import :: c_int,c_char,c_ptr
+      type(c_ptr), intent(in), value :: a
+      integer(c_int), intent(in),  value :: result_image,errmsg_len,unused
+      integer(c_int), intent(out), optional, volatile :: stat
+      character(kind=c_char), intent(out), optional, volatile :: errmsg(*)
+    end subroutine
+
+    ! C function signature from ../mpi/mpi_caf.c
+    ! void
+    ! PREFIX (co_reduce) (gfc_descriptor_t *a, void *(*opr) (void *, void *), int opr_flags,
+    !                     int result_image, int *stat, char *errmsg, int a_len, int errmsg_len)
+    subroutine opencoarrays_co_reduce(a, opr, opr_flags, result_image, stat, errmsg, a_len, errmsg_len) &
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+      bind(C,name="_caf_extensions_co_reduce")
+#else
+      bind(C,name="_gfortran_caf_co_reduce")
+#endif
+      use iso_c_binding, only : c_ptr,c_funptr,c_int,c_char
+      type(c_ptr), intent(in), value :: a
+      type(c_funptr), intent(in), value :: opr
+      integer(c_int), intent(in), value :: opr_flags,result_image
+      integer(c_int), intent(out) :: stat
+      character(kind=c_char), intent(out), optional, volatile :: errmsg(*)
+      integer(c_int), intent(in), value :: a_len
+      integer(c_int), intent(in), value :: errmsg_len
+    end subroutine
+
+    ! C function signature from ../mpi/mpi_caf.c:
+    ! void
+    ! PREFIX (get) (caf_token_t token, size_t offset,
+    !              int image_index,
+    !              gfc_descriptor_t *src ,
+    !              caf_vector_t *src_vector __attribute__ ((unused)),
+    !              gfc_descriptor_t *dest, int src_kind, int dst_kind,
+    !              bool mrt)
+    subroutine opencoarrays_get(token, offset, image_index_, src, src_vector_unused, dest, src_kind, dst_kind, mrt) &
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+      bind(C,name="_caf_extensions_get")
+#else
+      bind(C,name="_gfortran_caf_get")
+#endif
+      import c_ptrdiff_t,c_int,gfc_descriptor_t,c_bool,caf_vector_t,c_ptr
+      type(c_ptr), value :: token
+      integer(c_ptrdiff_t), value :: offset
+      integer(c_int), value :: image_index_
+      type(gfc_descriptor_t) :: src
+      type(caf_vector_t) :: src_vector_unused
+      type(gfc_descriptor_t) :: dest
+      integer(c_int), value :: src_kind
+      integer(c_int), value :: dst_kind
+      logical(c_bool), value :: mrt
+    end subroutine
+
+    ! C function signature from ../mpi/mpi_caf.c:
+    ! void
+    ! PREFIX (co_max) (gfc_descriptor_t *a, int result_image, int *stat,
+    !                  char *errmsg, int src_len, int errmsg_len)
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    subroutine opencoarrays_co_max(a,result_image, stat, errmsg, unused, errmsg_len) bind(C,name="_caf_extensions_co_max")
+#else
+    subroutine opencoarrays_co_max(a,result_image, stat, errmsg, unused, errmsg_len) bind(C,name="_gfortran_caf_co_max")
+#endif
+      import :: c_int,c_char,c_ptr
+      type(c_ptr), intent(in), value :: a
+      integer(c_int), intent(in),  value :: result_image,errmsg_len,unused
+      integer(c_int), intent(out), optional :: stat
+      character(kind=c_char), intent(out), optional :: errmsg(*)
+    end subroutine
+
+    ! C function signature from ../mpi/mpi_caf.c:
+    ! void
+    ! PREFIX (co_sum) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
+    !                  int errmsg_len)
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    subroutine opencoarrays_co_sum(a, result_image, stat, errmsg, errmsg_len) bind(C,name="_caf_extensions_co_sum")
+#else
+    subroutine opencoarrays_co_sum(a, result_image, stat, errmsg, errmsg_len) bind(C,name="_gfortran_caf_co_sum")
+#endif
+      import :: c_int,c_char,c_ptr
+      type(c_ptr), intent(in), value :: a
+      integer(c_int), intent(in),  value :: result_image,errmsg_len
+      integer(c_int), intent(out), optional :: stat
+      character(kind=c_char), intent(out), optional :: errmsg(*)
+    end subroutine
+
+    ! C function signature from ../mpi/mpi_caf.c
+    ! void
+    ! PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *errmsg,
+    !                  int errmsg_len)
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    subroutine opencoarrays_co_broadcast(a,source_image, stat, errmsg, errmsg_len) bind(C,name="_caf_extensions_co_broadcast")
+#else
+    subroutine opencoarrays_co_broadcast(a,source_image, stat, errmsg, errmsg_len) bind(C,name="_gfortran_caf_co_broadcast")
+#endif
+      import :: c_int,c_char,c_ptr
+      type(c_ptr), intent(in), value :: a
+      integer(c_int), intent(in),  value :: source_image,errmsg_len
+      integer(c_int), intent(out), optional :: stat
+      character(kind=c_char), intent(out), optional :: errmsg(*)
+    end subroutine
+
+    ! C function signature from ../mpi/mpi_caf.c:
+    ! int PREFIX (this_image) (int);
+    function opencoarrays_this_image(coarray) bind(C,name="_gfortran_caf_this_image") result(image_num)
+      import :: c_int
+      integer(c_int), value, intent(in) :: coarray
+      integer(c_int)  :: image_num
+    end function
+
+    ! C function signature from ../mpi/mpi_caf.c:
+    ! int PREFIX (num_images) (int, int);
+    function opencoarrays_num_images(coarray,dim_) bind(C,name="_gfortran_caf_num_images") result(num_images_)
+      import :: c_int
+      integer(c_int), value, intent(in) :: coarray,dim_
+      integer(c_int) :: num_images_
+    end function
+
+    ! C function signature from ../mpi_caf.c
+    ! void PREFIX (error_stop) (int32_t) __attribute__ ((noreturn));
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    subroutine opencoarrays_error_stop(stop_code) bind(C,name="_caf_extensions_error_stop")
+#else
+    subroutine opencoarrays_error_stop(stop_code) bind(C,name="_gfortran_caf_error_stop")
+#endif
+      import :: c_int32_t
+      integer(c_int32_t), value, intent(in) :: stop_code
+    end subroutine
+
+    ! C function signature from ../mpi_caf.c
+    ! void PREFIX (sync_all) (int *, char *, int);
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    subroutine opencoarrays_sync_all(stat,errmsg,unused) bind(C,name="_caf_extensions_sync_all")
+#else
+    subroutine opencoarrays_sync_all(stat,errmsg,unused) bind(C,name="_gfortran_caf_sync_all")
+#endif
+      import :: c_int,c_char
+      integer(c_int), intent(out) :: stat,unused
+      character(c_char), intent(out) :: errmsg(*)
+    end subroutine
+
+  end interface
+
+
+contains
+
+  ! __________ Descriptor constructors for for each supported type and kind ____________
+  ! ____________________________________________________________________________________
+
+  function my_dtype(type_,kind_,rank_) result(dtype_)
+    integer,parameter :: GFC_DTYPE_SIZE_SHIFT = 8, GFC_DTYPE_TYPE_SHIFT=3
+    integer(c_int32_t), intent(in) :: type_,kind_,rank_
+    integer(c_int32_t) :: dtype_
+
+    ! SIZE Type Rank
+    ! 0000  000  000
+
+    ! Rank is represented in the 3 least significant bits
+    dtype_ = ior(0_c_int32_t,rank_)
+    ! The next three bits represent the type id as expressed in libcaf-gfortran-descriptor.h
+    dtype_ = ior(dtype_,ishft(type_,GFC_DTYPE_TYPE_SHIFT))
+    ! The most significant bits represent the size of a the type (single or double precision).
+    ! We can express the precision in terms of 32-bit words: 1 for single, 2 for double.
+    dtype_ = ior(dtype_,ishft(kind_,GFC_DTYPE_SIZE_SHIFT))
+
+  end function
+
+  function gfc_descriptor_c_int(a) result(a_descriptor)
+    integer(c_int), intent(in), target, contiguous :: a(..)
+    type(gfc_descriptor_t) :: a_descriptor
+    integer(c_int), parameter :: unit_stride=1,scalar_offset=-1
+    integer(c_int) :: i
+
+    a_descriptor%dtype = my_dtype(type_=BT_INTEGER,kind_=int(c_sizeof(a)/bytes_per_word,c_int32_t),rank_=rank(a))
+    a_descriptor%offset = scalar_offset
+    a_descriptor%base_addr = c_loc(a) ! data
+    do concurrent(i=1:rank(a))
+      a_descriptor%dim_(i)%stride  = unit_stride
+      a_descriptor%dim_(i)%lower_bound = lbound(a,i)
+      a_descriptor%dim_(i)%ubound_ = ubound(a,i)
+    end do
+
+  end function
+
+  function gfc_descriptor_logical(a) result(a_descriptor)
+    logical, intent(in), target, contiguous :: a(..)
+    type(gfc_descriptor_t) :: a_descriptor
+    integer(c_int), parameter :: unit_stride=1,scalar_offset=-1,words=1
+    integer(c_int) :: i
+
+    a_descriptor%dtype = my_dtype(type_=BT_LOGICAL,kind_=words,rank_=rank(a))
+    a_descriptor%offset = scalar_offset
+    a_descriptor%base_addr = c_loc(a) ! data
+    do concurrent(i=1:rank(a))
+      a_descriptor%dim_(i)%stride  = unit_stride
+      a_descriptor%dim_(i)%lower_bound = lbound(a,i)
+      a_descriptor%dim_(i)%ubound_ = ubound(a,i)
+    end do
+
+  end function
+
+  function gfc_descriptor_c_double(a) result(a_descriptor)
+    real(c_double), intent(in), target, contiguous :: a(..)
+    type(gfc_descriptor_t) :: a_descriptor
+    integer(c_int), parameter :: unit_stride=1,scalar_offset=-1
+    integer(c_int) :: i
+
+    a_descriptor%dtype = my_dtype(type_=BT_REAL,kind_=int(c_sizeof(a)/bytes_per_word,c_int32_t),rank_=rank(a))
+    a_descriptor%offset = scalar_offset
+    a_descriptor%base_addr = c_loc(a) ! data
+    do concurrent(i=1:rank(a))
+      a_descriptor%dim_(i)%stride  = unit_stride
+      a_descriptor%dim_(i)%lower_bound = lbound(a,i)
+      a_descriptor%dim_(i)%ubound_ = ubound(a,i)
+    end do
+
+  end function
+
+ ! This version should work for any rank but causes an ICE with gfortran 4.9.2
+ !
+ !function gfc_descriptor_c_char(a) result(a_descriptor)
+ !  character(c_char), intent(in), target, contiguous :: a(..)
+ !  type(gfc_descriptor_t) :: a_descriptor
+ !  integer(c_int), parameter :: unit_stride=1,scalar_offset=-1
+ !  integer(c_int) :: i
+
+ !  a_descriptor%dtype = my_dtype(type_=BT_CHARACTER,kind_=int(c_sizeof(a)/bytes_per_word,c_int32_t),rank_=rank(a))
+ !  a_descriptor%offset = scalar_offset
+ !  a_descriptor%base_addr = c_loc(a) ! data
+ !  do concurrent(i=1:rank(a))
+ !    a_descriptor%dim_(i)%stride  = unit_stride
+ !    a_descriptor%dim_(i)%lower_bound = lbound(a,i)
+ !    a_descriptor%dim_(i)%ubound_ = ubound(a,i)
+ !  end do
+
+ !end function
+
+  ! ______ Assumed-rank co_reduce wrappers for each supported type and kind _________
+  ! _________________________________________________________________________________
+
+    subroutine co_reduce_c_int(a, opr, result_image, stat, errmsg)
+      ! Dummy variables
+      integer(c_int), intent(inout), volatile, contiguous :: a(..)
+      procedure(c_int_operator) :: opr
+      integer(c_int), intent(in), optional :: result_image
+      integer(c_int), intent(out), optional, volatile :: stat
+      character(kind=c_char), intent(out), optional, volatile :: errmsg(*)
+      ! Local variables
+      integer(c_int), volatile :: opr_flags_unused,a_len_unused,errmsg_len
+      type(gfc_descriptor_t), target :: a_descriptor
+      integer(c_int), parameter :: default_result_image=0
+      integer(c_int) :: result_image_
+
+      result_image_ = merge(result_image,default_result_image,present(result_image))
+      a_descriptor = gfc_descriptor(a)
+      call opencoarrays_co_reduce( &
+        c_loc(a_descriptor), c_funloc(opr), opr_flags_unused, result_image_, stat, errmsg, a_len_unused, errmsg_len &
+      )
+    end subroutine
+
+    subroutine co_reduce_logical(a, opr, result_image, stat, errmsg)
+      ! Dummy variables
+      logical, intent(inout), volatile, contiguous :: a(..)
+      procedure(logical_operator) :: opr
+      integer(c_int), intent(in), optional :: result_image
+      integer(c_int), intent(out),optional , volatile :: stat
+      character(kind=c_char), intent(out), optional, volatile :: errmsg(*)
+      ! Local variables
+      integer(c_int), volatile :: opr_flags_unused,a_len_unused,errmsg_len
+      type(gfc_descriptor_t), target :: a_descriptor
+      integer(c_int), parameter :: default_result_image=0
+      integer(c_int) :: result_image_
+
+      result_image_ = merge(result_image,default_result_image,present(result_image))
+      a_descriptor = gfc_descriptor(a)
+      call opencoarrays_co_reduce( &
+        c_loc(a_descriptor), c_funloc(opr), opr_flags_unused, result_image_, stat, errmsg, a_len_unused, errmsg_len &
+      )
+    end subroutine
+
+    subroutine co_reduce_c_double(a, opr, result_image, stat, errmsg)
+      ! Dummy variables
+      real(c_double), intent(inout), volatile, contiguous :: a(..)
+      procedure(c_double_operator) :: opr
+      integer(c_int), intent(in), optional :: result_image
+      integer(c_int), intent(out), optional, volatile :: stat
+      character(kind=c_char), intent(out), optional, volatile :: errmsg(*)
+      ! Local variables
+      integer(c_int), volatile :: opr_flags_unused,a_len_unused,errmsg_len
+      type(gfc_descriptor_t), target :: a_descriptor
+      integer(c_int), parameter :: default_result_image=0
+      integer(c_int) :: result_image_
+
+      result_image_ = merge(result_image,default_result_image,present(result_image))
+      a_descriptor = gfc_descriptor(a)
+      call opencoarrays_co_reduce( &
+        c_loc(a_descriptor), c_funloc(opr), opr_flags_unused, result_image_, stat, errmsg, a_len_unused, errmsg_len &
+      )
+    end subroutine
+
+  ! ______ Assumed-rank co_broadcast wrappers for each supported type and kind _________
+  ! ____________________________________________________________________________________
+
+  ! This provisional implementation incurs some overhead by converting the character argument
+  ! to an integer(c_int) array, invoking co_broadcast_c_int and then convering the received
+  ! message back from the integer(c_int) array to a character variable.
+  !
+  ! Replace this implementation with one that avoids the conversions and the associated copies
+  ! once the compiler provides support for co_broadcast with scalar arguments.
+  !
+  subroutine co_broadcast_c_char(a,source_image,stat,errmsg)
+    character(kind=c_char,len=*), intent(inout), volatile, target :: a
+    integer(c_int), intent(in), optional :: source_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    integer(c_int), allocatable :: a_cast_to_integer_array(:)
+
+    ! Convert "a" to an integer(c_int) array where each 32-bit integer element holds four 1-byte characters
+    a_cast_to_integer_array = transfer(a,[0_c_int])
+    ! Broadcast the integer(c_int) array
+    call co_broadcast_c_int(a_cast_to_integer_array,source_image, stat, errmsg)
+    ! Recover the characters from the broadcasted integer(c_int) array
+    a = transfer(a_cast_to_integer_array,repeat(' ',len(a)))
+
+  end subroutine
+
+  subroutine co_broadcast_c_double(a,source_image,stat,errmsg)
+    real(c_double), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: source_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants
+    integer(c_int), parameter :: default_source_image=0
+    integer(c_int) :: source_image_ ! Local replacement for the corresponding intent(in) dummy argument
+    type(gfc_descriptor_t), target :: a_descriptor
+
+    source_image_ = merge(source_image,default_source_image,present(source_image))
+    a_descriptor = gfc_descriptor(a)
+    call opencoarrays_co_broadcast(c_loc(a_descriptor),source_image_, stat, errmsg, len(errmsg))
+
+  end subroutine
+
+  subroutine co_broadcast_c_int(a,source_image,stat,errmsg)
+    integer(c_int), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: source_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    integer(c_int), parameter :: default_source_image=0
+    integer(c_int) :: source_image_ ! Local replacement for the corresponding intent(in) dummy argument
+    type(gfc_descriptor_t), target :: a_descriptor
+
+    source_image_ = merge(source_image,default_source_image,present(source_image))
+    a_descriptor = gfc_descriptor(a)
+    call opencoarrays_co_broadcast(c_loc(a_descriptor),source_image_, stat, errmsg, len(errmsg))
+
+  end subroutine
+
+  ! ________ Assumed-rank get wrappers for each supported type and kind ________________
+  ! ________ (Incomplete, private and unsupported) _____________________________________
+  ! ____________________________________________________________________________________
+
+  subroutine get_c_int(src , dest, image_index_, offset, mrt)
+    use iso_fortran_env, only : error_unit
+    ! Dummy arguments:
+    integer(c_int), intent(in), target, contiguous :: src(..)
+    integer(c_int), intent(out), target, contiguous, volatile :: dest(..)
+    integer(c_int), intent(in) :: image_index_
+    integer(c_ptrdiff_t), intent(in) :: offset
+    logical(c_bool), intent(in) :: mrt
+    ! Local variables:
+    type(gfc_descriptor_t), target, volatile ::  dest_descriptor
+    type(gfc_descriptor_t), target :: src_descriptor
+    type(caf_vector_t) :: src_vector_unused
+    type(c_ptr) :: token
+
+    write(error_unit,*) "Remote access of coarrays not yet supported"
+    call error_stop
+
+    src_descriptor = gfc_descriptor(src)
+    dest_descriptor = gfc_descriptor(dest)
+    call opencoarrays_get( &
+     token, offset, image_index_, src_descriptor, src_vector_unused, dest_descriptor, kind(src), kind(dest), mrt &
+    )
+  end subroutine
+
+  ! ________ Assumed-rank co_min wrappers for each supported type and kind _____________
+  ! ____________________________________________________________________________________
+
+  subroutine co_min_c_int(a,result_image,stat,errmsg)
+    integer(c_int), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: result_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    type(gfc_descriptor_t), target :: a_descriptor
+    integer(c_int), parameter :: default_result_image=0
+    integer(c_int) :: unused, result_image_ ! Local replacement for the corresponding intent(in) dummy argument
+
+    a_descriptor = gfc_descriptor(a)
+    result_image_ = merge(result_image,default_result_image,present(result_image))
+    call opencoarrays_co_min(c_loc(a_descriptor),result_image_, stat, errmsg, unused, len(errmsg))
+
+  end subroutine
+
+  subroutine co_min_c_double(a,result_image,stat,errmsg)
+    real(c_double), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: result_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    type(gfc_descriptor_t), target :: a_descriptor
+    integer(c_int), parameter :: default_result_image=0
+    integer(c_int) :: unused, result_image_ ! Local replacement for the corresponding intent(in) dummy argument
+
+    a_descriptor = gfc_descriptor(a)
+    result_image_ = merge(result_image,default_result_image,present(result_image))
+    call opencoarrays_co_min(c_loc(a_descriptor),result_image_, stat, errmsg, unused, len(errmsg))
+
+  end subroutine
+
+
+  ! ________ Assumed-rank co_max wrappers for each supported type and kind _____________
+  ! ____________________________________________________________________________________
+
+  subroutine co_max_c_int(a,result_image,stat,errmsg)
+    integer(c_int), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: result_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    type(gfc_descriptor_t), target :: a_descriptor
+    integer(c_int), parameter :: default_result_image=0
+    integer(c_int) :: unused, result_image_ ! Local replacement for the corresponding intent(in) dummy argument
+
+    a_descriptor = gfc_descriptor(a)
+    result_image_ = merge(result_image,default_result_image,present(result_image))
+    call opencoarrays_co_max(c_loc(a_descriptor),result_image_, stat, errmsg, unused, len(errmsg))
+
+  end subroutine
+
+  subroutine co_max_c_double(a,result_image,stat,errmsg)
+    real(c_double), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: result_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    type(gfc_descriptor_t), target :: a_descriptor
+    integer(c_int), parameter :: default_result_image=0
+    integer(c_int) :: unused, result_image_ ! Local replacement for the corresponding intent(in) dummy argument
+
+    a_descriptor = gfc_descriptor(a)
+    result_image_ = merge(result_image,default_result_image,present(result_image))
+    call opencoarrays_co_max(c_loc(a_descriptor),result_image_, stat, errmsg, unused, len(errmsg))
+
+  end subroutine
+
+  ! ________ Assumed-rank co_sum wrappers for each supported type and kind _____________
+  ! ____________________________________________________________________________________
+
+  subroutine co_sum_c_double(a,result_image,stat,errmsg)
+    real(c_double), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: result_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    type(gfc_descriptor_t), target :: a_descriptor
+    integer(c_int), parameter :: default_result_image=0
+    integer(c_int) :: result_image_ ! Local replacement for the corresponding intent(in) dummy argument
+
+    a_descriptor = gfc_descriptor(a)
+    result_image_ = merge(result_image,default_result_image,present(result_image))
+    call opencoarrays_co_sum(c_loc(a_descriptor),result_image_, stat, errmsg, len(errmsg))
+
+  end subroutine
+
+  subroutine co_sum_c_int(a,result_image,stat,errmsg)
+    integer(c_int), intent(inout), volatile, target, contiguous :: a(..)
+    integer(c_int), intent(in), optional :: result_image
+    integer(c_int), intent(out), optional:: stat
+    character(kind=1,len=*), intent(out), optional :: errmsg
+    ! Local variables and constants:
+    integer(c_int), parameter :: default_result_image=0
+    type(gfc_descriptor_t), target :: a_descriptor
+    integer(c_int) :: result_image_ ! Local replacement for the corresponding intent(in) dummy argument
+
+    a_descriptor = gfc_descriptor(a)
+    result_image_ = merge(result_image,default_result_image,present(result_image))
+    call opencoarrays_co_sum(c_loc(a_descriptor),result_image_, stat, errmsg, len(errmsg))
+
+  end subroutine
+
+  ! Return the image number (MPI rank + 1)
+  function this_image()  result(image_num)
+    use mpi, only : MPI_Comm_rank
+    integer(c_int) :: image_num,ierr
+   !image_num = opencoarrays_this_image(unused)
+    call MPI_Comm_rank(CAF_COMM_WORLD,image_num,ierr)
+    if (ierr/=0) call error_stop
+    image_num = image_num + 1
+  end function
+
+  ! Return the total number of images
+  function num_images()  result(num_images_)
+    use mpi, only : MPI_Comm_size
+    integer(c_int) :: num_images_,ierr
+   !num_images_ = opencoarrays_num_images(unused_coarray,unused_scalar)
+    call MPI_Comm_size(CAF_COMM_WORLD,num_images_,ierr)
+    if (ierr/=0) call error_stop
+  end function
+
+  ! Halt the execution of all images
+  subroutine error_stop(stop_code)
+    integer(c_int32_t), intent(in), optional :: stop_code
+    integer(c_int32_t), parameter :: default_code=-1_c_int32_t
+    integer(c_int32_t) :: code
+    code = merge(stop_code,default_code,present(stop_code))
+    call opencoarrays_error_stop(code)
+  end subroutine
+
+  ! Impose a global execution barrier
+  subroutine sync_all(stat,errmsg,unused)
+    integer(c_int), intent(out), optional :: stat,unused
+    character(c_char), intent(out), optional :: errmsg
+    call opencoarrays_sync_all(stat,errmsg,unused)
+  end subroutine
+
+#ifdef COMPILER_SUPPORTS_ATOMICS
+   ! Proposed Fortran 2015 event_post procedure
+   subroutine event_post(this)
+     class(event_type), intent(inout) ::  this
+     if (.not.allocated(this%atom)) this%atom=0
+     call atomic_define ( this%atom, this%atom + 1_atomic_int_kind )
+   end subroutine
+#endif
+
+end module
diff --git a/src/gasnet/CMakeLists.txt b/src/gasnet/CMakeLists.txt
new file mode 100644
index 0000000..f5ead80
--- /dev/null
+++ b/src/gasnet/CMakeLists.txt
@@ -0,0 +1 @@
+add_library(caf_gasnet gasnet.c ../common/caf_auxiliary.c)
diff --git a/src/gasnet/Makefile b/src/gasnet/Makefile
new file mode 100644
index 0000000..b8ed666
--- /dev/null
+++ b/src/gasnet/Makefile
@@ -0,0 +1,44 @@
+include ../make.inc
+
+MY_GASNET_CFLAGS := $(GASNET_CFLAGS)
+MY_GASNET_CC := $(GASNET_CC)
+MY_GASNET_LD := $(GASNET_LD)
+MY_GASNET_LDFLAGS := $(GASNET_LDFLAGS)
+
+include $(GASNET_MAK)
+
+
+GASNET_CFLAGS += $(CFLAGS) $(MY_GASNET_CFLAGS)
+
+ifneq ($(MY_GASNET_CC),)
+  GASNET_CC = $(MY_GASNET_CC)
+endif
+
+ifneq ($(MY_GASNET_LD),)
+  GASNET_LD = $(MY_GASNET_LD)
+endif
+
+ifneq ($(MY_GASNET_LDFLAGS),)
+  GASNET_LDFLAGS = $(MY_GASNET_LDFLAGS)
+endif
+
+
+all: libcaf_gasnet.a
+
+libcaf_gasnet.a: gasnet.o ../common/caf_auxiliary.o
+	ar rcv $@ gasnet.o ../common/caf_auxiliary.o
+	ranlib $@
+
+.c.o:
+	$(GASNET_CC) $(GASNET_CPPFLAGS) $(GASNET_CFLAGS) -I.. -c -o $@ $<
+
+gasnet.o: gasnet.c ../libcaf.h ../libcaf-gfortran-descriptor.h
+
+../common/caf_auxiliary.o:
+	$(MAKE) -C ../common
+
+clean:
+	rm -f gasnet.o
+
+distclean: clean
+	rm -f libcaf_armci.a
diff --git a/src/gasnet/gasnet.c b/src/gasnet/gasnet.c
new file mode 100644
index 0000000..38402a6
--- /dev/null
+++ b/src/gasnet/gasnet.c
@@ -0,0 +1,1201 @@
+/* GASNet implementation of Libcaf
+
+Copyright (c) 2012-2014, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+/****l* gasnet/gasnet_caf.c
+ * NAME
+ *   gasnet_caf
+ * SYNOPSIS
+ *   This program implements the LIBCAF_GASNET transport layer.
+******
+*/
+
+
+#include "libcaf.h"
+#include "gasnet.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>	/* For memcpy.  */
+#include <stdarg.h>	/* For variadic arguments.  */
+#include <alloca.h>
+#ifdef STRIDED
+#include "gasnet_vis.h"
+#endif
+
+#ifndef MALLOC_ALIGNMENT
+#  define MALLOC_ALIGNMENT   (2 *sizeof(size_t) < __alignof__ (long double)   \
+                              ? __alignof__ (long double) : 2*sizeof(size_t))
+#endif
+
+/* Define GFC_CAF_CHECK to enable run-time checking.  */
+/* #define GFC_CAF_CHECK  1  */
+
+typedef gasnet_seginfo_t * gasnet_caf_token_t;
+#define TOKEN(X) ((gasnet_caf_token_t) (X))
+
+#define send_notify1_handler   201
+
+static void error_stop (int error) __attribute__ ((noreturn));
+
+/* Global variables.  */
+static int caf_this_image;
+static int caf_num_images;
+static int caf_is_finalized;
+
+/*Sync image part*/
+static int sizeOrders = 0;
+static int *orders;
+static int *arrived;
+static int *images_full;
+
+caf_static_t *caf_static_list = NULL;
+static size_t r_pointer;
+static void *remote_memory = NULL;
+static long remoteMemorySize = 0;
+
+
+static bool
+freeToGo ()
+{
+  int i = 0;
+  bool ret = false;
+
+  gasnet_hold_interrupts ();
+
+  sizeOrders = 0;
+
+  for (i = 0; i < caf_num_images; i++)
+    {
+      if(orders[i] != 0)
+	{
+	  sizeOrders++;
+	  if(arrived[i]!=0)
+	    {
+	      orders[i]--;
+	      sizeOrders--;
+	      arrived[i]--;
+	    }
+	}
+    }
+
+  if (sizeOrders == 0)
+    ret = true;
+
+  gasnet_resume_interrupts ();
+
+  return ret;
+}
+
+
+static void
+initImageSync()
+{
+  int i=0,j=0;
+  orders = (int *)calloc(caf_num_images,sizeof(int));
+  arrived = (int *)calloc(caf_num_images,sizeof(int));
+  images_full = (int *)calloc(caf_num_images-1,sizeof(int));
+
+  for(i=0;i<caf_num_images;i++)
+    {
+      if(i+1 != caf_this_image)
+	{
+	  images_full[j]=i+1;
+	  j++;
+	}
+    }
+}
+
+static void
+insOrders (int *images, int n)
+{
+  int i = 0;
+
+  for (i = 0; i < n; i++)
+    {
+      orders[images[i]-1]++;
+      /* printf("Process: %d order: %d\n",caf_this_image,images[i]); */
+      gasnet_AMRequestShort1 (images[i]-1, send_notify1_handler, caf_this_image);
+    }
+}
+
+static void
+insArrived (int image)
+{
+  arrived[image-1]++;
+  /* printf("Process: %d arrived: %d\n",caf_this_image,image); */
+}
+
+
+static void
+req_notify1_handler (gasnet_token_t token, int proc)
+{
+  insArrived (proc);
+  /* gasnet_AMReplyShort1 (token, recv_notify1_handler, caf_this_image-1);  */
+}
+
+
+#if 0
+static void
+rec_notify1_handler (gasnet_token_t token, int proc)
+ {
+ }
+#endif
+
+
+/* Keep in sync with single.c.  */
+static void
+caf_runtime_error (const char *message, ...)
+{
+  va_list ap;
+  fprintf (stderr, "Fortran runtime error on image %d: ", caf_this_image);
+  va_start (ap, message);
+  vfprintf (stderr, message, ap);
+  va_end (ap);
+  fprintf (stderr, "\n");
+
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  /* FIXME: Do some more effort than just to abort.  */
+  gasnet_exit(EXIT_FAILURE);
+
+  /* Should be unreachable, but to make sure also call exit.  */
+  exit (EXIT_FAILURE);
+}
+
+
+/* Initialize coarray program.  This routine assumes that no other
+   GASNet initialization happened before. */
+
+void
+PREFIX (init) (int *argc, char ***argv)
+{
+  if (caf_num_images == 0)
+    {
+      int ierr=0;
+
+      if (argc == NULL)
+	{
+	  /* GASNet hat the bug that it expects that argv[0] is always
+	     available.  Provide a summy argument.  */
+	  int i = 0;
+	  char *tmpstr = "CAF";
+          char **strarray = alloca (sizeof (char *));
+          strarray[0] = tmpstr;
+	  ierr = gasnet_init (&i, &strarray);
+	}
+      else
+	ierr = gasnet_init (argc, argv);
+
+      if (unlikely ((ierr != GASNET_OK)))
+	caf_runtime_error ("Failure when initializing GASNet: %d", ierr);
+
+      caf_num_images = gasnet_nodes ();
+      caf_this_image = gasnet_mynode ();
+
+      caf_this_image++;
+      caf_is_finalized = 0;
+
+      initImageSync ();
+
+      char *envvar = NULL;
+
+      envvar = getenv ("GASNET_NPAGES");
+
+      if(!envvar)
+        {
+          remoteMemorySize = gasnet_getMaxLocalSegmentSize();
+        }
+      else
+        {
+          long n_pages = 4096;
+          sscanf(envvar,"%ld",&n_pages);
+#ifdef DEBUG
+          printf("n_pages %ld\n",n_pages);
+#endif
+          remoteMemorySize = n_pages*GASNET_PAGESIZE;
+        }
+
+      /* It creates the remote memory on each image */
+      if (remote_memory==NULL)
+	{
+	  gasnet_handlerentry_t htable[] = {
+	    { send_notify1_handler,  req_notify1_handler },
+	  };
+
+	  if (gasnet_attach (htable, sizeof (htable)/sizeof (gasnet_handlerentry_t),
+			     remoteMemorySize, GASNET_PAGESIZE))
+	    goto error;
+
+	  r_pointer = 0;
+
+	  remote_memory = malloc (sizeof (gasnet_seginfo_t) * caf_num_images);
+
+	  if (remote_memory == NULL)
+	    goto error;
+
+	  /* gasnet_seginfo_t *tt = (gasnet_seginfo_t*)*token;  */
+
+	  ierr = gasnet_getSegmentInfo (TOKEN (remote_memory), caf_num_images);
+
+	  if (unlikely (ierr))
+	    {
+	      free (remote_memory);
+	      goto error;
+	    }
+
+	}
+
+    }
+
+  return;
+
+error:
+  {
+    char *msg;
+
+    if (caf_is_finalized)
+      msg = "Failed to create remote memory space - there are stopped images";
+    else
+      msg = "Failed during initialization and memory allocation";
+
+    caf_runtime_error (msg);
+  }
+
+}
+
+
+/* Finalize coarray program.   */
+
+void
+PREFIX (finalize) (void)
+{
+  gasnet_barrier_notify (0, GASNET_BARRIERFLAG_ANONYMOUS);
+  gasnet_barrier_wait (0, GASNET_BARRIERFLAG_ANONYMOUS);
+
+  while (caf_static_list != NULL)
+    {
+      caf_static_t *tmp = caf_static_list->prev;
+
+      /* (void) ARMCI_Free (caf_static_list->token[caf_this_image-1]);  */
+      /* free (caf_static_list->token);  */
+      free (caf_static_list);
+      caf_static_list = tmp;
+    }
+
+  /* (void) ARMCI_Finalize (); */
+  /* armci_msg_finalize (); */
+  gasnet_exit (0);
+
+  caf_is_finalized = 1;
+}
+
+
+int
+PREFIX (this_image) (int distance __attribute__ ((unused)))
+{
+  return caf_this_image;
+}
+
+
+int
+PREFIX (num_images) (int distance __attribute__ ((unused)),
+		     int failed __attribute__ ((unused)))
+{
+  return caf_num_images;
+}
+
+
+void *
+PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
+		   int *stat, char *errmsg, int errmsg_len)
+{
+  int i;
+
+  if (unlikely (caf_is_finalized))
+    goto error;
+
+  /* Start GASNET if not already started.  */
+  if (caf_num_images == 0)
+    PREFIX (init) (NULL, NULL);
+
+  /* Here there was the if statement for remote allocation */
+  /* Now it is included in init */
+
+#ifdef DEBUG
+  printf("image: %d memorysize: %ld Requested: %ld, status: %ld\n",caf_this_image,remoteMemorySize,size,r_pointer);
+#endif
+
+  /* Allocation check */
+  if ((size+r_pointer) > remoteMemorySize)
+    goto error;
+
+  /* New variable registration.  */
+
+  /* Token contains only a list of pointers.  */
+  *token = malloc (caf_num_images * sizeof (void *));
+
+  for (i = 0; i < caf_num_images; i++)
+  {
+    gasnet_seginfo_t *rm = TOKEN (remote_memory);
+    char * tm = (char *) rm[i].addr;
+    tm += r_pointer;
+    void ** t = *token;
+    t[i] = (void *) tm;
+  }
+
+  r_pointer += size;
+  size_t align = r_pointer % MALLOC_ALIGNMENT;
+  if (align)
+    r_pointer += MALLOC_ALIGNMENT - align;
+
+  if (type == CAF_REGTYPE_COARRAY_STATIC)
+    {
+      caf_static_t *tmp = malloc (sizeof (caf_static_t));
+      tmp->prev  = caf_static_list;
+      tmp->token = *token;
+      caf_static_list = tmp;
+    }
+
+  if (stat)
+    *stat = 0;
+
+  void **tm = *token;
+  return tm[caf_this_image-1];
+
+error:
+  {
+    char *msg;
+
+    if (caf_is_finalized)
+      msg = "Failed to allocate coarray - there are stopped images";
+    else
+      msg = "Failed to allocate coarray";
+
+    if (stat)
+      {
+	*stat = caf_is_finalized ? STAT_STOPPED_IMAGE : 1;
+	if (errmsg_len > 0)
+	  {
+	    int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+							: (int) strlen (msg);
+	    memcpy (errmsg, msg, len);
+	    if (errmsg_len > len)
+	      memset (&errmsg[len], ' ', errmsg_len-len);
+	  }
+      }
+    else
+      caf_runtime_error (msg);
+  }
+
+  return NULL;
+}
+
+
+void
+PREFIX (deregister) (caf_token_t *token, int *stat, char *errmsg,
+		     int errmsg_len)
+{
+  if (unlikely (caf_is_finalized))
+    {
+      const char msg[] = "Failed to deallocate coarray - "
+			  "there are stopped images";
+      if (stat)
+	{
+	  *stat = STAT_STOPPED_IMAGE;
+
+	  if (errmsg_len > 0)
+	    {
+	      int len = ((int) sizeof (msg) - 1 > errmsg_len)
+			? errmsg_len : (int) sizeof (msg) - 1;
+	      memcpy (errmsg, msg, len);
+	      if (errmsg_len > len)
+		memset (&errmsg[len], ' ', errmsg_len-len);
+	    }
+	  return;
+	}
+      caf_runtime_error (msg);
+    }
+
+  PREFIX(sync_all) (NULL, NULL, 0);
+
+  if (stat)
+    *stat = 0;
+
+  /* if (unlikely (ierr = ARMCI_Free ((*token)[caf_this_image-1]))) */
+  /*   caf_runtime_error ("ARMCI memory freeing failed: Error code %d", ierr); */
+  //gasnet_exit(0);
+
+  free (*token);
+}
+
+
+void
+PREFIX (sync_all) (int *stat, char *errmsg, int errmsg_len)
+{
+  int ierr;
+
+  if (unlikely (caf_is_finalized))
+    ierr = STAT_STOPPED_IMAGE;
+  else
+    {
+      gasnet_barrier_notify (0, GASNET_BARRIERFLAG_ANONYMOUS);
+      gasnet_barrier_wait (0, GASNET_BARRIERFLAG_ANONYMOUS);
+      ierr = 0;
+    }
+
+  if (stat)
+    *stat = ierr;
+
+  if (ierr)
+    {
+      char *msg;
+      if (caf_is_finalized)
+	msg = "SYNC ALL failed - there are stopped images";
+      else
+	msg = "SYNC ALL failed";
+
+      if (errmsg_len > 0)
+	{
+	  int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+						      : (int) strlen (msg);
+	  memcpy (errmsg, msg, len);
+	  if (errmsg_len > len)
+	    memset (&errmsg[len], ' ', errmsg_len-len);
+	}
+      else
+	caf_runtime_error (msg);
+    }
+}
+
+
+/* Send scalar (or contiguous) data from buffer to a remote image.  */
+
+/* token: The token of the array to be written to. */
+/* offset: Difference between the coarray base address and the actual data, used for caf(3)[2] = 8 or caf[4]%a(4)%b = 7. */
+/* image_index: Index of the coarray (typically remote, though it can also be on this_image). */
+/* data: Pointer to the to-be-transferred data. */
+/* size: The number of bytes to be transferred. */
+/* asynchronous: Return before the data transfer has been complete  */
+
+void
+PREFIX (send) (caf_token_t token, size_t offset, int image_index,
+	       gfc_descriptor_t *dest,
+	       caf_vector_t *dst_vector __attribute__ ((unused)),
+	       gfc_descriptor_t *src, int dst_kind, int src_kind)
+{
+  int ierr = 0, j=0;
+  size_t i, size;
+  void **tm = token;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  ptrdiff_t dst_offset = 0;
+  void *pad_str = NULL;
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+
+  if (unlikely (size == 0))
+    return;  /* Zero-sized array.  */
+
+  /* It works only if contiguous */
+  /*if (image_index == caf_this_image)
+    {
+       void *dest = (void *) ((char *) tm[image_index-1] + offset);
+       memmove (dest, src->base_addr, size*dst_size);
+       return;
+    }*/
+
+  if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+    {
+      pad_str = alloca (dst_size - src_size);
+      if (dst_kind == 1)
+	memset (pad_str, ' ', dst_size-src_size);
+      else /* dst_kind == 4.  */
+	for (i = 0; i < (dst_size-src_size)/4; i++)
+	      ((int32_t*) pad_str)[i] = (int32_t) ' ';
+    }
+
+  if (rank == 0
+      || (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind && GFC_DESCRIPTOR_RANK (src) != 0
+          && (GFC_DESCRIPTOR_TYPE (dest) != BT_CHARACTER || dst_size == src_size)
+	  && PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src)))
+    {
+      /* MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, *p); */
+      if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind)
+	gasnet_put_bulk (image_index-1, tm[image_index-1]+offset, src->base_addr, (dst_size > src_size ? src_size : dst_size)*size);
+	/* ierr = MPI_Put (src->base_addr, dst_size*size, MPI_BYTE, */
+	/* 		image_index-1, offset, */
+	/* 		(dst_size > src_size ? src_size : dst_size) * size, */
+	/* 		MPI_BYTE, *p); */
+      if (pad_str)
+	gasnet_put_bulk (image_index-1, tm[image_index-1]+offset, pad_str, dst_size-src_size);
+	/* ierr = MPI_Put (pad_str, dst_size-src_size, MPI_BYTE, image_index-1, */
+	/* 		offset, dst_size - src_size, MPI_BYTE, *p); */
+      /* MPI_Win_unlock (image_index-1, *p); */
+      if (ierr != 0)
+	error_stop (ierr);
+      return;
+    }
+  else
+    {
+#ifdef STRIDED
+      gasnet_memvec_t * arr_dsp_s, *arr_dsp_d;
+
+      void *sr = src->base_addr;
+
+      arr_dsp_s = malloc (size * sizeof (gasnet_memvec_t));
+      arr_dsp_d = malloc (size * sizeof (gasnet_memvec_t));
+
+      for (i = 0; i < size; i++)
+	{
+	  ptrdiff_t array_offset_dst = 0;
+	  ptrdiff_t stride = 1;
+	  ptrdiff_t extent = 1;
+	  for (j = 0; j < rank-1; j++)
+	    {
+	      array_offset_dst += ((i / (extent*stride))
+				   % (dest->dim[j]._ubound
+				      - dest->dim[j].lower_bound + 1))
+		* dest->dim[j]._stride;
+	      extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+	      stride = dest->dim[j]._stride;
+	    }
+
+	  array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+
+	  //arr_dsp_d[i] = calloc(1,sizeof(struct gasnet_memvec_t));
+
+	  arr_dsp_d[i].addr = tm[image_index-1]+(offset+array_offset_dst)*GFC_DESCRIPTOR_SIZE (dest);
+	  arr_dsp_d[i].len = GFC_DESCRIPTOR_SIZE (dest);
+
+	  //arr_dsp_s[i] = calloc(1,sizeof(struct gasnet_memvec_t));
+
+	  if (GFC_DESCRIPTOR_RANK (src) != 0)
+	    {
+	      ptrdiff_t array_offset_sr = 0;
+	      stride = 1;
+	      extent = 1;
+	      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+		{
+		  array_offset_sr += ((i / (extent*stride))
+				      % (src->dim[j]._ubound
+					 - src->dim[j].lower_bound + 1))
+		    * src->dim[j]._stride;
+		  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+		  stride = src->dim[j]._stride;
+		}
+
+	      array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+
+	      /* arr_dsp_s[i] = array_offset_sr; */
+
+	      arr_dsp_s[i].addr = sr+array_offset_sr*GFC_DESCRIPTOR_SIZE (src);
+	      arr_dsp_s[i].len = GFC_DESCRIPTOR_SIZE (src);
+	    }
+	  else
+	    {
+	      arr_dsp_s[i].addr = sr;
+	      arr_dsp_s[i].len = GFC_DESCRIPTOR_SIZE (src);
+	    }
+	  //dst_offset = offset + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+	  /* void *sr = (void *)((char *) src->base_addr */
+	  /* 			  + array_offset_sr*GFC_DESCRIPTOR_SIZE (src)); */
+
+	}
+
+      /* gasnet_puts_bulk(image_index-1, tm[image_index-1]+offset, arr_dsp_d, */
+      /* 		 sr, arr_dsp_s, arr_count, size); */
+      gasnet_putv_bulk(image_index-1,size,arr_dsp_d,size,arr_dsp_s);
+
+      free(arr_dsp_s);
+      free(arr_dsp_d);
+#else
+      for (i = 0; i < size; i++)
+	{
+	  ptrdiff_t array_offset_dst = 0;
+	  ptrdiff_t stride = 1;
+	  ptrdiff_t extent = 1;
+	  for (j = 0; j < rank-1; j++)
+	    {
+	      array_offset_dst += ((i / (extent*stride))
+				   % (dest->dim[j]._ubound
+				      - dest->dim[j].lower_bound + 1))
+		* dest->dim[j]._stride;
+	      extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+	      stride = dest->dim[j]._stride;
+	    }
+	  array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+	  dst_offset = offset + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+
+	  void *sr;
+	  if (GFC_DESCRIPTOR_RANK (src) != 0)
+	    {
+	      ptrdiff_t array_offset_sr = 0;
+	      stride = 1;
+	      extent = 1;
+	      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+		{
+		  array_offset_sr += ((i / (extent*stride))
+				      % (src->dim[j]._ubound
+					 - src->dim[j].lower_bound + 1))
+		    * src->dim[j]._stride;
+		  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+		  stride = src->dim[j]._stride;
+		}
+	      array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+	      sr = (void *)((char *) src->base_addr
+			    + array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+	    }
+	  else
+	    sr = src->base_addr;
+
+	  gasnet_put_bulk (image_index-1, tm[image_index-1]+dst_offset, sr, dst_size);
+
+	  /* ierr = MPI_Put (sr, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, image_index-1, */
+	  /* 		  dst_offset, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, *p); */
+	  if (pad_str)
+	    gasnet_put_bulk (image_index-1, tm[image_index-1]+offset, pad_str, dst_size-src_size);
+	    /* ierr = MPI_Put (pad_str, dst_size - src_size, MPI_BYTE, image_index-1, */
+	    /* 		    dst_offset, dst_size - src_size, MPI_BYTE, *p); */
+	  if (ierr != 0)
+	    {
+	      error_stop (ierr);
+	      return;
+	    }
+	}
+#endif
+    }
+
+  /* if (async == false) */
+    /* gasnet_put_bulk (image_index-1, tm[image_index-1]+offset, data, size); */
+  /* else */
+  /*   ierr = ARMCI_NbPut(data,t.addr+offset,size,image_index-1,NULL); */
+  if(ierr != 0)
+    error_stop (ierr);
+}
+
+
+/* Send array data from src to dest on a remote image.  */
+
+/* void */
+/* PREFIX (send_desc) (caf_token_t token, size_t offset, int image_index, */
+/* 		    gfc_descriptor_t *dest, gfc_descriptor_t *src, bool async) */
+/* { */
+/*   int ierr = 0; */
+/*   size_t i, size; */
+/*   int j; */
+/*   int rank = GFC_DESCRIPTOR_RANK (dest); */
+/*   void **tm = token; */
+
+/*   size = 1; */
+/*   for (j = 0; j < rank; j++) */
+/*     { */
+/*       ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1; */
+/*       if (dimextent < 0) */
+/* 	dimextent = 0; */
+/*       size *= dimextent; */
+/*     } */
+
+/*   if (size == 0) */
+/*     return; */
+
+/*   if (PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src)) */
+/*     { */
+/*       void *dst = (void *)((char *) tm[image_index-1] + offset); */
+/*       if (image_index == caf_this_image) */
+/* 	  memmove (dst, src->base_addr, GFC_DESCRIPTOR_SIZE (dest)*size); */
+/*       else /\* if (!async) *\/ */
+/*         gasnet_put_bulk (image_index-1, dst, src->base_addr, GFC_DESCRIPTOR_SIZE (dest)*size); */
+/*       /\* else *\/ */
+
+/*       if (ierr != 0) */
+/* 	error_stop (ierr); */
+/*       return; */
+/*     } */
+
+/*   for (i = 0; i < size; i++) */
+/*     { */
+/*       ptrdiff_t array_offset_dst = 0; */
+/*       ptrdiff_t stride = 1; */
+/*       ptrdiff_t extent = 1; */
+/*       for (j = 0; j < rank-1; j++) */
+/* 	{ */
+/* 	  array_offset_dst += ((i / (extent*stride)) */
+/* 			       % (dest->dim[j]._ubound */
+/* 				  - dest->dim[j].lower_bound + 1)) */
+/* 			      * dest->dim[j]._stride; */
+/* 	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1); */
+/*           stride = dest->dim[j]._stride; */
+/* 	} */
+/*       array_offset_dst += (i / extent) * dest->dim[rank-1]._stride; */
+
+/*       ptrdiff_t array_offset_sr = 0; */
+/*       stride = 1; */
+/*       extent = 1; */
+/*       for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++) */
+/* 	{ */
+/* 	  array_offset_sr += ((i / (extent*stride)) */
+/* 			   % (src->dim[j]._ubound */
+/* 			      - src->dim[j].lower_bound + 1)) */
+/* 			  * src->dim[j]._stride; */
+/* 	  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1); */
+/*           stride = src->dim[j]._stride; */
+/* 	} */
+/*       array_offset_sr += (i / extent) * src->dim[rank-1]._stride; */
+
+/*       void *dst = (void *)((char *) tm[image_index-1] + offset */
+/* 			   + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest)); */
+/*       void *sr = (void *)((char *) src->base_addr */
+/* 			  + array_offset_sr*GFC_DESCRIPTOR_SIZE (src)); */
+/*       if (image_index == caf_this_image) */
+/* 	memmove (dst, sr, GFC_DESCRIPTOR_SIZE (dest)); */
+/*       else /\* if (!async) *\/ */
+/*         gasnet_put_bulk (image_index-1, dst, sr, GFC_DESCRIPTOR_SIZE (dest)); */
+/*       /\* else *\/ */
+
+/*       if (ierr != 0) */
+/* 	{ */
+/* 	  error_stop (ierr); */
+/* 	  return; */
+/* 	} */
+/*     } */
+/* } */
+
+
+/* Send scalar data from src to array dest on a remote image.  */
+
+void
+PREFIX (send_desc_scalar) (caf_token_t token, size_t offset, int image_index,
+			   gfc_descriptor_t *dest, void *buffer, bool async)
+{
+  int ierr = 0;
+  size_t i, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  void **tm = token;
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset += ((i / (extent*stride))
+			   % (dest->dim[j]._ubound
+			      - dest->dim[j].lower_bound + 1))
+			  * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset += (i / extent) * dest->dim[rank-1]._stride;
+      void *dst = (void *)((char *) tm[image_index-1] + offset
+			   + array_offset*GFC_DESCRIPTOR_SIZE (dest));
+      if (image_index == caf_this_image)
+	memmove (dst, buffer, GFC_DESCRIPTOR_SIZE (dest));
+      else /* if (!async) */
+        gasnet_put_bulk (image_index-1, dst, buffer,
+			 GFC_DESCRIPTOR_SIZE (dest));
+      /* else */
+
+      if (ierr != 0)
+	{
+	  error_stop (ierr);
+	  return;
+	}
+    }
+}
+
+
+void
+PREFIX (get) (caf_token_t token, size_t offset,
+	      int image_index __attribute__ ((unused)),
+	      gfc_descriptor_t *src ,
+	      caf_vector_t *src_vector __attribute__ ((unused)),
+	      gfc_descriptor_t *dest, int src_kind, int dst_kind)
+
+{
+  size_t i, size;
+  int ierr = 0, j = 0;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+  void *pad_str = NULL;
+
+  void **tm = token;
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (unlikely (size == 0))
+    return;  /* Zero-sized array.  */
+
+  /* if (image_index == caf_this_image) */
+  /*   { */
+  /*      void *src = (void *) ((char *) tm[image_index-1] + offset); */
+  /*      memmove (data, src, size); */
+  /*      return; */
+  /*   } */
+
+  if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+    {
+      pad_str = alloca (dst_size - src_size);
+      if (dst_kind == 1)
+	memset (pad_str, ' ', dst_size-src_size);
+      else /* dst_kind == 4.  */
+	for (i = 0; i < (dst_size-src_size)/4; i++)
+	      ((int32_t*) pad_str)[i] = (int32_t) ' ';
+    }
+
+  if (rank == 0
+      || (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind
+          && (GFC_DESCRIPTOR_TYPE (dest) != BT_CHARACTER || dst_size == src_size)
+	  && PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src)))
+    {
+      gasnet_get_bulk (dest->base_addr, image_index-1, tm[image_index-1]+offset, size*dst_size);
+
+      if (pad_str)
+	memcpy ((char *) dest->base_addr + src_size, pad_str,
+		dst_size-src_size);
+    }
+  else
+    {
+#ifdef STRIDED
+      gasnet_memvec_t * arr_dsp_s, *arr_dsp_d;
+
+      void *dst = dest->base_addr;
+
+      arr_dsp_s = malloc (size * sizeof (gasnet_memvec_t));
+      arr_dsp_d = malloc (size * sizeof (gasnet_memvec_t));
+
+      for (i = 0; i < size; i++)
+	{
+	  ptrdiff_t array_offset_dst = 0;
+	  ptrdiff_t stride = 1;
+	  ptrdiff_t extent = 1;
+	  for (j = 0; j < rank-1; j++)
+	    {
+	      array_offset_dst += ((i / (extent*stride))
+				   % (dest->dim[j]._ubound
+				      - dest->dim[j].lower_bound + 1))
+		* dest->dim[j]._stride;
+	      extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+	      stride = dest->dim[j]._stride;
+	    }
+	  array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+
+	  arr_dsp_d[i].addr = dst+array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+	  arr_dsp_d[i].len = dst_size;
+
+	  ptrdiff_t array_offset_sr = 0;
+	  stride = 1;
+	  extent = 1;
+	  for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	    {
+	      array_offset_sr += ((i / (extent*stride))
+				  % (src->dim[j]._ubound
+				     - src->dim[j].lower_bound + 1))
+		* src->dim[j]._stride;
+	      extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+	      stride = src->dim[j]._stride;
+	    }
+
+	  array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+
+	  arr_dsp_s[i].addr = tm[image_index-1]+(array_offset_sr+offset)*GFC_DESCRIPTOR_SIZE (src);
+	  arr_dsp_s[i].len = src_size;
+
+	}
+
+      gasnet_getv_bulk(size,arr_dsp_d,image_index-1,size,arr_dsp_s);
+
+      free(arr_dsp_s);
+      free(arr_dsp_d);
+#else
+      for (i = 0; i < size; i++)
+	{
+	  ptrdiff_t array_offset_dst = 0;
+	  ptrdiff_t stride = 1;
+	  ptrdiff_t extent = 1;
+	  for (j = 0; j < rank-1; j++)
+	    {
+	      array_offset_dst += ((i / (extent*stride))
+				   % (dest->dim[j]._ubound
+				      - dest->dim[j].lower_bound + 1))
+		* dest->dim[j]._stride;
+	      extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+	      stride = dest->dim[j]._stride;
+	    }
+	  array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+
+	  ptrdiff_t array_offset_sr = 0;
+	  stride = 1;
+	  extent = 1;
+	  for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	    {
+	      array_offset_sr += ((i / (extent*stride))
+				  % (src->dim[j]._ubound
+				     - src->dim[j].lower_bound + 1))
+		* src->dim[j]._stride;
+	      extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+	      stride = src->dim[j]._stride;
+	    }
+
+	  array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+
+	  size_t sr_off = offset + array_offset_sr*GFC_DESCRIPTOR_SIZE (src);
+	  void *dst = (void *) ((char *) dest->base_addr
+				+ array_offset_dst*GFC_DESCRIPTOR_SIZE (dest));
+	  /* FIXME: Handle image_index == this_image().  */
+	  /*  if (async == false) */
+	  gasnet_get_bulk (dst, image_index-1, tm[image_index-1]+sr_off, GFC_DESCRIPTOR_SIZE (dest));
+	  /* ierr = MPI_Get (dst, GFC_DESCRIPTOR_SIZE (dest), */
+	  /* 		    MPI_BYTE, image_index-1, sr_off, */
+	  /* 		    GFC_DESCRIPTOR_SIZE (src), MPI_BYTE, *p); */
+	  if (pad_str)
+	    memcpy ((char *) dst + src_size, pad_str, dst_size-src_size);
+
+	  if (ierr != 0)
+	    error_stop (ierr);
+	}
+#endif
+    }
+
+  /* if (async == false) */
+    /* gasnet_get_bulk (data, image_index-1, tm[image_index-1]+offset, size); */
+  /* else */
+  /*   ierr = ARMCI_NbPut(data,t.addr+offset,size,image_index-1,NULL); */
+  if (ierr != 0)
+    error_stop (ierr);
+
+}
+
+
+/* Get array data from a remote src to a local dest.  */
+
+void
+PREFIX (get_desc) (caf_token_t token, size_t offset, int image_index,
+		   gfc_descriptor_t *src, gfc_descriptor_t *dest,
+		   bool async __attribute__ ((unused)))
+{
+  size_t i, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  void **tm = token;
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  if (PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src))
+    {
+      void *sr = (void *) ((char *) tm[image_index-1] + offset);
+      if (image_index == caf_this_image)
+	memmove (dest->base_addr, sr, GFC_DESCRIPTOR_SIZE (dest)*size);
+      else
+	gasnet_get_bulk (dest->base_addr, image_index-1, sr,
+			 GFC_DESCRIPTOR_SIZE (dest)*size);
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_dst = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset_dst += ((i / (extent*stride))
+			       % (dest->dim[j]._ubound
+				  - dest->dim[j].lower_bound + 1))
+			      * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+
+      ptrdiff_t array_offset_sr = 0;
+      stride = 1;
+      extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	{
+	  array_offset_sr += ((i / (extent*stride))
+			   % (src->dim[j]._ubound
+			      - src->dim[j].lower_bound + 1))
+			  * src->dim[j]._stride;
+	  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+          stride = src->dim[j]._stride;
+	}
+      array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+
+      void *sr = (void *)((char *) tm[image_index-1] + offset
+			   + array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+      void *dst = (void *)((char *) dest->base_addr
+			  + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest));
+      if (image_index == caf_this_image)
+	memmove (dst, sr, GFC_DESCRIPTOR_SIZE (dest));
+      else
+	gasnet_get_bulk (dst, image_index-1, sr, GFC_DESCRIPTOR_SIZE (dest));
+    }
+}
+
+
+/* SYNC IMAGES. Note: SYNC IMAGES(*) is passed as count == -1 while
+   SYNC IMAGES([]) has count == 0. Note further that SYNC IMAGES(*)
+   is not equivalent to SYNC ALL. */
+void
+PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
+		      int errmsg_len)
+{
+  int ierr;
+  if (count == 0 || (count == 1 && images[0] == caf_this_image))
+    {
+      if (stat)
+	*stat = 0;
+      return;
+    }
+
+#ifdef GFC_CAF_CHECK
+  {
+    int i;
+    for (i = 0; i < count; i++)
+      if (images[i] < 1 || images[i] > caf_num_images)
+	{
+	  fprintf (stderr, "COARRAY ERROR: Invalid image index %d to SYNC "
+		   "IMAGES", images[i]);
+	  error_stop (1);
+	}
+  }
+#endif
+
+  if (unlikely (caf_is_finalized))
+    ierr = STAT_STOPPED_IMAGE;
+  else
+    {
+      if(count == -1)
+	insOrders (images_full, caf_num_images-1);
+      else
+	insOrders (images, count);
+
+      GASNET_BLOCKUNTIL(freeToGo() == true);
+
+      ierr = 0;
+    }
+
+  if (stat)
+    *stat = ierr;
+
+  if (ierr)
+    {
+      char *msg;
+      if (caf_is_finalized)
+	msg = "SYNC IMAGES failed - there are stopped images";
+      else
+	msg = "SYNC IMAGES failed";
+
+      if (errmsg_len > 0)
+	{
+	  int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+						      : (int) strlen (msg);
+	  memcpy (errmsg, msg, len);
+	  if (errmsg_len > len)
+	    memset (&errmsg[len], ' ', errmsg_len-len);
+	}
+      else
+	caf_runtime_error (msg);
+    }
+}
+
+
+/* ERROR STOP the other images.  */
+
+static void
+error_stop (int error)
+{
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  /* FIXME: Do some more effort than just gasnet_exit().  */
+  gasnet_exit(error);
+
+  /* Should be unreachable, but to make sure also call exit.  */
+  exit (error);
+}
+
+
+/* ERROR STOP function for string arguments.  */
+
+void
+PREFIX (error_stop_str) (const char *string, int32_t len)
+{
+  fputs ("ERROR STOP ", stderr);
+  while (len--)
+    fputc (*(string++), stderr);
+  fputs ("\n", stderr);
+
+  error_stop (1);
+}
+
+
+/* ERROR STOP function for numerical arguments.  */
+
+void
+PREFIX (error_stop) (int32_t error)
+{
+  fprintf (stderr, "ERROR STOP %d\n", error);
+  error_stop (error);
+}
diff --git a/src/libcaf-gfortran-descriptor.h b/src/libcaf-gfortran-descriptor.h
new file mode 100644
index 0000000..c9b93bc
--- /dev/null
+++ b/src/libcaf-gfortran-descriptor.h
@@ -0,0 +1,144 @@
+/* One-sided MPI implementation of Libcaf
+
+Copyright (c) 2012-2016, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#ifndef LIBCAF_GFORTRAN_DESCRIPTOR_H
+#define LIBCAF_GFORTRAN_DESCRIPTOR_H
+
+#include <stdint.h>	/* For int32_t.  */
+
+/* GNU Fortran's array descriptor.  Keep in sync with libgfortran.h.  To be
+   replaced by TS29113's ISO_Fortran_binding.h with CFI_cdesc_t.  */
+
+enum
+{ BT_UNKNOWN = 0, BT_INTEGER, BT_LOGICAL, BT_REAL, BT_COMPLEX,
+  BT_DERIVED, BT_CHARACTER, BT_CLASS, BT_PROCEDURE, BT_HOLLERITH, BT_VOID,
+  BT_ASSUMED
+};
+
+typedef struct descriptor_dimension
+{
+  ptrdiff_t _stride;
+  ptrdiff_t lower_bound;
+  ptrdiff_t _ubound;
+}
+descriptor_dimension;
+
+typedef struct gfc_descriptor_t {
+  void *base_addr;
+  size_t offset;
+  ptrdiff_t dtype;
+  descriptor_dimension dim[];
+} gfc_descriptor_t;
+
+
+#define GFC_MAX_DIMENSIONS 7
+
+#define GFC_DTYPE_RANK_MASK 0x07
+#define GFC_DTYPE_TYPE_SHIFT 3
+#define GFC_DTYPE_TYPE_MASK 0x38
+#define GFC_DTYPE_SIZE_SHIFT 6
+#define GFC_DESCRIPTOR_RANK(desc) ((desc)->dtype & GFC_DTYPE_RANK_MASK)
+#define GFC_DESCRIPTOR_TYPE(desc) (((desc)->dtype & GFC_DTYPE_TYPE_MASK) \
+                                   >> GFC_DTYPE_TYPE_SHIFT)
+#define GFC_DESCRIPTOR_SIZE(desc) ((desc)->dtype >> GFC_DTYPE_SIZE_SHIFT)
+
+#define GFC_DTYPE_SIZE_MASK \
+  ((~((ptrdiff_t) 0) >> GFC_DTYPE_SIZE_SHIFT) << GFC_DTYPE_SIZE_SHIFT)
+#define GFC_DTYPE_TYPE_SIZE_MASK (GFC_DTYPE_SIZE_MASK | GFC_DTYPE_TYPE_MASK)
+
+#define GFC_DTYPE_TYPE_SIZE(desc) ((desc)->dtype & GFC_DTYPE_TYPE_SIZE_MASK)
+
+#define GFC_DTYPE_INTEGER_1 ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(int8_t) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_INTEGER_2 ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(int16_t) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_INTEGER_4 ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(int32_t) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_INTEGER_8 ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(int64_t) << GFC_DTYPE_SIZE_SHIFT))
+#if HAVE_INT128_T
+#define GFC_DTYPE_INTEGER_16 ((BT_INTEGER << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(__int128_t) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+
+#define GFC_DTYPE_LOGICAL_4 ((BT_LOGICAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(int) << GFC_DTYPE_SIZE_SHIFT))
+
+#if 0
+#define GFC_DTYPE_LOGICAL_1 ((BT_LOGICAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(GFC_LOGICAL_1) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_LOGICAL_2 ((BT_LOGICAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(GFC_LOGICAL_2) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_LOGICAL_8 ((BT_LOGICAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(double) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_LOGICAL_16 ((BT_LOGICAL << GFC_DTYPE_TYPE_SHIFT)\
+   | (sizeof(GFC_LOGICAL_16) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+
+#define GFC_DTYPE_REAL_4 ((BT_REAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(float) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_REAL_8 ((BT_REAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(double) << GFC_DTYPE_SIZE_SHIFT))
+#if 0
+#ifdef HAVE_GFC_REAL_10
+#define GFC_DTYPE_REAL_10  ((BT_REAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(GFC_REAL_10) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+#ifdef HAVE_GFC_REAL_16
+#define GFC_DTYPE_REAL_16 ((BT_REAL << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(GFC_REAL_16) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+#endif
+
+#define GFC_DTYPE_COMPLEX_4 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(_Complex float) << GFC_DTYPE_SIZE_SHIFT))
+#define GFC_DTYPE_COMPLEX_8 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(_Complex double) << GFC_DTYPE_SIZE_SHIFT))
+#if 0
+#ifdef HAVE_GFC_COMPLEX_10
+#define GFC_DTYPE_COMPLEX_10 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(GFC_COMPLEX_10) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+#ifdef HAVE_GFC_COMPLEX_16
+#define GFC_DTYPE_COMPLEX_16 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(GFC_COMPLEX_16) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+#endif
+
+/* FIXME: Hardwiring these values to what the mpi_caf.c macro GFC_DTYPE_TYPE_SIZE(desc)
+    receives in the dtype component its gf_descriptor_t argument for character(kind=c_char)
+    and logical(kind=c_bool) data:
+*/
+#define GFC_DTYPE_CHARACTER 48
+
+#if 0
+#define GFC_DTYPE_CHARACTER ((BT_CHARACTER << GFC_DTYPE_TYPE_SHIFT) \
+   | (sizeof(char) << GFC_DTYPE_SIZE_SHIFT))
+#endif
+
+
+#endif  /* LIBCAF_GFORTRAN_DESCRIPTOR_H.  */
diff --git a/src/libcaf.h b/src/libcaf.h
new file mode 100644
index 0000000..1ea065c
--- /dev/null
+++ b/src/libcaf.h
@@ -0,0 +1,154 @@
+/* One-sided MPI implementation of Libcaf
+
+Copyright (c) 2012-2016, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#ifndef LIBCAF_H
+#define LIBCAF_H
+
+#include <stdint.h>	/* For int32_t.  */
+#include <stddef.h>	/* For size_t.  */
+#include <stdbool.h>
+
+#include "libcaf-gfortran-descriptor.h"
+
+#ifndef __GNUC__
+#define __attribute__(x)
+#define likely(x)       (x)
+#define unlikely(x)     (x)
+#else
+#define likely(x)       __builtin_expect(!!(x), 1)
+#define unlikely(x)     __builtin_expect(!!(x), 0)
+#endif
+
+#ifdef PREFIX_NAME
+#define PREFIX3(X,Y) X ## Y
+#define PREFIX2(X,Y) PREFIX3(X,Y)
+#define PREFIX(X) PREFIX2(PREFIX_NAME,X)
+#else
+#define PREFIX(X) X
+#endif
+
+
+/* Definitions of the Fortran 2008 standard; need to kept in sync with
+   ISO_FORTRAN_ENV, cf. libgfortran.h.  */
+#define STAT_UNLOCKED		0
+#define STAT_LOCKED		1
+#define STAT_LOCKED_OTHER_IMAGE	2
+#define STAT_DUP_SYNC_IMAGES    3
+#define STAT_STOPPED_IMAGE 	6000
+
+/* Describes what type of array we are registerring. Keep in sync with
+   gcc/fortran/trans.h.  */
+typedef enum caf_register_t {
+  CAF_REGTYPE_COARRAY_STATIC,
+  CAF_REGTYPE_COARRAY_ALLOC,
+  CAF_REGTYPE_LOCK_STATIC,
+  CAF_REGTYPE_LOCK_ALLOC,
+  CAF_REGTYPE_CRITICAL,
+  CAF_REGTYPE_EVENT_STATIC,
+  CAF_REGTYPE_EVENT_ALLOC
+}
+caf_register_t;
+
+typedef void* caf_token_t;
+
+
+/* Linked list of static coarrays registered.  */
+typedef struct caf_static_t {
+  caf_token_t token;
+  struct caf_static_t *prev;
+}
+caf_static_t;
+
+/* When there is a vector subscript in this dimension, nvec == 0, otherwise,
+   lower_bound, upper_bound, stride contains the bounds relative to the declared
+   bounds; kind denotes the integer kind of the elements of vector[].  */
+typedef struct caf_vector_t {
+  size_t nvec;
+  union {
+    struct {
+      void *vector;
+      int kind;
+    } v;
+    struct {
+      ptrdiff_t lower_bound, upper_bound, stride;
+    } triplet;
+  } u;
+}
+caf_vector_t;
+
+
+
+/* Common auxiliary functions: caf_auxiliary.c.  */
+
+bool PREFIX (is_contiguous) (gfc_descriptor_t *);
+
+
+/* Header for the specific implementation.  */
+
+void PREFIX (init) (int *, char ***);
+void PREFIX (finalize) (void);
+
+int PREFIX (this_image) (int);
+int PREFIX (num_images) (int, int);
+
+void *PREFIX (register) (size_t, caf_register_t, caf_token_t *, int *, char *,
+			int);
+void PREFIX (deregister) (caf_token_t *, int *, char *, int);
+
+void PREFIX (caf_get) (caf_token_t, size_t, int, gfc_descriptor_t *,
+		       caf_vector_t *, gfc_descriptor_t *, int, int);
+void PREFIX (caf_send) (caf_token_t, size_t, int, gfc_descriptor_t *,
+                        caf_vector_t *, gfc_descriptor_t *, int, int);
+
+void PREFIX (caf_sendget) (caf_token_t, size_t, int, gfc_descriptor_t *,
+			   caf_vector_t *, caf_token_t, size_t, int,
+			   gfc_descriptor_t *, caf_vector_t *, int, int);
+
+void PREFIX (co_max) (gfc_descriptor_t *, int, int *, char *, int, int);
+void PREFIX (co_min) (gfc_descriptor_t *, int, int *, char *, int, int);
+void PREFIX (co_sum) (gfc_descriptor_t *, int, int *, char *, int);
+
+void PREFIX (sync_all) (int *, char *, int);
+void PREFIX (sync_images) (int, int[], int *, char *, int);
+
+void PREFIX (error_stop_str) (const char *, int32_t)
+     __attribute__ ((noreturn));
+void PREFIX (error_stop) (int32_t) __attribute__ ((noreturn));
+
+void PREFIX (atomic_define) (caf_token_t, size_t, int, void *, int *, int, int);
+void PREFIX (atomic_ref) (caf_token_t, size_t, int, void *, int *, int, int);
+void PREFIX (atomic_cas) (caf_token_t, size_t, int, void *, void *,
+			  void *, int *, int, int);
+void PREFIX (atomic_op) (int, caf_token_t, size_t, int, void *, void *,
+			 int *, int, int);
+
+void PREFIX (lock) (caf_token_t, size_t, int, int *, int *, char *, int);
+void PREFIX (unlock) (caf_token_t, size_t, int, int *, char *, int);
+void PREFIX (event_post) (caf_token_t, size_t, int, int *, char *, int);
+void PREFIX (event_wait) (caf_token_t, size_t, int, int *, char *, int);
+void PREFIX (event_query) (caf_token_t, size_t, int, int *, int *);
+#endif  /* LIBCAF_H  */
diff --git a/src/make.inc b/src/make.inc
new file mode 100644
index 0000000..1546113
--- /dev/null
+++ b/src/make.inc
@@ -0,0 +1,30 @@
+PREFIX_NAME=_gfortran_caf_
+FC=gfortran
+CC=gcc
+MPFC=mpifort
+
+FFLAGS = -O2 -g
+CFLAGS = -O2 -g
+FFLAGS_EXTRA = -fcoarray=lib
+CFLAGS_EXTRA = -DPREFIX_NAME=$(PREFIX_NAME) -DHAVE_INT128_T
+FFLAGS += $(FFLAGS_EXTRA)
+CFLAGS += $(CFLAGS_EXTRA)
+LDFLAGS +=
+
+SINGLE_CFLAGS += -Wall -Wextra
+
+ARMCI_DIR =
+ARMCI_CFLAGS  += -I$(ARMCI_DIR)/include/ -Werror -Wall -Wextra
+ARMCI_LDFLAGS += -L$(ARMCI_DIR)/lib -L$(ARMCI_DIR)/lib64 -larmci
+ARMCI_CC = mpicc
+ARMCI_RUN = mpirun -np 2
+
+MPI_EXTRA_FLAGS = -Wall -Wextra -Wno-error=cpp -Wno-error=unused-parameter -DSTRIDED#-DNONBLOCKING_PUT -DCAF_MPI_LOCK_UNLOCK
+MPI_CFLAGS += $(MPI_EXTRA_FLAGS)
+#MPICC =
+MPI_RUN = mpirun -np 2
+
+GASNET_CFLAGS += #-DSTRIDED
+GASNET_MAK = /home/rouson/Downloads/GASNet-1.22.4/smp-conduit/smp-par.mak
+GASNET_LDFLAGS +=$(GASNET_LIBS)
+GASNET_RUN = mpirun -np 2
diff --git a/src/make.inc.Cray-XE b/src/make.inc.Cray-XE
new file mode 100644
index 0000000..084ef30
--- /dev/null
+++ b/src/make.inc.Cray-XE
@@ -0,0 +1,35 @@
+#
+# Makefile for a Cray XE
+#
+# Before compiling the library, load the following modules:
+#   module swap PrgEnv-cray PrgEnv-gnu
+#   module load gcc/4.8.1  # für GASNet
+#   module load cray-ga  # für ARMCI
+#
+
+PREFIX_NAME=_gfortran_caf_
+FC=gfortran
+MPFC=mpifort
+
+FFLAGS += -O2 -g -fcoarray=lib
+CFLAGS += -DPREFIX_NAME=$(PREFIX_NAME) -g -O2 -DHAVE_INT128_T
+LDFLAGS +=
+
+SINGLE_CFLAGS += -Werror -Wall -Wextra
+
+ARMCI_DIR =
+ARMCI_CFLAGS += -Werror -Wall -Wextra
+ARMCI_LDFLAGS +=
+ARMCI_CC = cc
+ARMCI_RUN = aprun -n 2
+
+MPI_CFLAGS += -Werror -Wall -Wextra
+MPICC = cc
+MPI_RUN = aprun -n 2
+
+GASNET_CFLAGS += -I/opt/chapel/1.8.0/cray-xk/third-party/gasnet/install/cray-xk-cray-prgenv-gnu/seg-fast/nodbg/include/gemini-conduit/ \
+                 -I/opt/chapel/1.8.0/cray-xk/third-party/gasnet/install/cray-xk-cray-prgenv-gnu/seg-fast/nodbg/include/ \
+                 -Wno-all
+GASNET_MAK = /opt/chapel/1.8.0/cray-xk/third-party/gasnet/install/cray-xk-cray-prgenv-gnu/seg-fast/nodbg/include/gemini-conduit/gemini-par.mak
+GASNET_CC = cc
+GASNET_RUN = mpirun -np 2
diff --git a/src/mpi/CMakeLists.txt b/src/mpi/CMakeLists.txt
new file mode 100644
index 0000000..9c4fb5a
--- /dev/null
+++ b/src/mpi/CMakeLists.txt
@@ -0,0 +1,79 @@
+set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/mod)
+
+find_package(MPI REQUIRED)
+
+if("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "GNU")
+  set(gfortran_compiler true)
+elseif("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "Cray")
+  set(cray_compiler true)
+elseif("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "PGI")
+  set(portland_group_compiler true)
+endif()
+
+if(gfortran_compiler AND (NOT opencoarrays_aware_compiler))
+  # This applied to gfortran 4.9 and some earlier versions (FIX ME: find out which)
+  add_definitions(-DCOMPILER_SUPPORTS_CAF_INTRINSICS)
+endif()
+
+add_library(caf_mpi mpi_caf.c ../common/caf_auxiliary.c ../extensions/opencoarrays.F90)
+
+target_include_directories(caf_mpi PRIVATE ${MPI_C_INCLUDE_PATH})
+if (gfortran_compiler)
+  target_compile_options(caf_mpi INTERFACE -fcoarray=lib)
+endif()
+
+include_directories(${CMAKE_BINARY_DIR}/mod)
+
+install(TARGETS caf_mpi EXPORT OpenCoarraysTargets
+  ARCHIVE DESTINATION lib
+)
+install(DIRECTORY  ${CMAKE_BINARY_DIR}/mod DESTINATION .)
+
+# Now we write the script that passes CAF source to the compiler with the necessary arguments
+# and, if necessary, performs some code transformations prior to invoking the compiler.
+set(exe_dir ${CMAKE_BINARY_DIR}/bin_staging)
+set(compiler_wrapper ${exe_dir}/caf)
+install(
+    FILES "${compiler_wrapper}"
+    PERMISSIONS WORLD_EXECUTE WORLD_READ WORLD_WRITE OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_EXECUTE GROUP_READ GROUP_WRITE
+    DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/
+)
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/caf-head CAF_HEADER)
+file(WRITE  "${compiler_wrapper}" "${CAF_HEADER}\n")
+file(APPEND "${compiler_wrapper}"  "caf_mod_dir=${CMAKE_INSTALL_PREFIX}/mod\n")
+file(APPEND "${compiler_wrapper}"  "caf_lib_dir=${CMAKE_INSTALL_PREFIX}/lib\n")
+file(APPEND "${compiler_wrapper}"  "caf_version=${PROJECT_VERSION}\n")
+if(gfortran_compiler)
+  file(APPEND "${compiler_wrapper}"  "link_args='-fcoarray=lib -lcaf_mpi'\n")
+elseif(portland_group_compiler)
+  file(APPEND "${compiler_wrapper}"  "link_args=-lcaf_mpi\n")
+endif()
+if(opencoarrays_aware_compiler)
+  file(APPEND "${compiler_wrapper}"  "caf_compiler=true\n")
+else()
+  file(APPEND "${compiler_wrapper}"  "caf_compiler=false\n")
+endif()
+if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
+  file(APPEND "${compiler_wrapper}"  "linux=true\n")
+endif()
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/caf-foot FOOTER)
+file(APPEND "${compiler_wrapper}" "${FOOTER}")
+
+# Now we write the script that launches executable files produced from CAF codes
+set(caf_launcher ${exe_dir}/cafrun)
+install(
+    FILES "${caf_launcher}"
+    PERMISSIONS WORLD_EXECUTE WORLD_READ WORLD_WRITE OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_EXECUTE GROUP_READ GROUP_WRITE
+    DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
+)
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/cafrun-head CAFRUN_HEADER)
+file(WRITE  "${caf_launcher}" "${CAFRUN_HEADER}\n")
+file(APPEND "${caf_launcher}"  "caf_version=${PROJECT_VERSION}\n")
+
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/cafrun-foot FOOTER)
+file(APPEND "${caf_launcher}" "${FOOTER}")
+
+# This could be needed to produce shared libraries:
+#target_link_libraries(caf_mpi PRIVATE ${MPI_C_LIBRARIES})
+
+#set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${exe_dir}/cafrun;${exe_dir}/caf;${exe_dir}/test-caf-tally.sh")
diff --git a/src/mpi/Makefile b/src/mpi/Makefile
new file mode 100644
index 0000000..d997295
--- /dev/null
+++ b/src/mpi/Makefile
@@ -0,0 +1,21 @@
+include ../make.inc
+
+MPICC ?= mpicc
+
+libcaf_mpi.a: mpi_caf.o ../common/caf_auxiliary.o
+	ar rcv $@ mpi_caf.o ../common/caf_auxiliary.o
+	ranlib $@
+
+.c.o:
+	$(MPICC) $(CFLAGS) $(MPI_CFLAGS) -I.. -c $< -o $@
+
+mpi_caf.o: mpi_caf.c ../libcaf.h ../libcaf-gfortran-descriptor.h
+
+../common/caf_auxiliary.o:
+	$(MAKE) -C ../common
+
+clean:
+	rm -f mpi_caf.o
+
+distclean: clean
+	rm -f libcaf_mpi.a
diff --git a/src/mpi/mpi_caf.c b/src/mpi/mpi_caf.c
new file mode 100644
index 0000000..55b288d
--- /dev/null
+++ b/src/mpi/mpi_caf.c
@@ -0,0 +1,2528 @@
+/* One-sided MPI implementation of Libcaf
+
+Copyright (c) 2012-2016, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+/****l* mpi/mpi_caf.c
+ * NAME
+ *   mpi_caf
+ * SYNOPSIS
+ *   This program implements the LIBCAF_MPI transport layer.
+******
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>        /* For memcpy.  */
+#include <stdarg.h>        /* For variadic arguments.  */
+#include <alloca.h>
+#include <unistd.h>
+#include <mpi.h>
+#include <pthread.h>
+
+#include "libcaf.h"
+
+/* Define GFC_CAF_CHECK to enable run-time checking.  */
+/* #define GFC_CAF_CHECK  1  */
+
+typedef MPI_Win *mpi_caf_token_t;
+#define TOKEN(X) ((mpi_caf_token_t) (X))
+
+static void error_stop (int error) __attribute__ ((noreturn));
+
+/* Global variables.  */
+static int caf_this_image;
+static int caf_num_images;
+static int caf_is_finalized;
+
+#if MPI_VERSION >= 3
+  MPI_Info mpi_info_same_size;
+#endif // MPI_VERSION
+
+/*Sync image part*/
+
+static int *orders;
+MPI_Request *handlers;
+static int *images_full;
+static int *arrived;
+
+/* Pending puts */
+#if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
+typedef struct win_sync {
+  MPI_Win *win;
+  int img;
+  struct win_sync *next;
+} win_sync;
+
+static win_sync *last_elem = NULL;
+static win_sync *pending_puts = NULL;
+#endif
+
+caf_static_t *caf_static_list = NULL;
+caf_static_t *caf_tot = NULL;
+
+/* Image status variable */
+
+static int *img_status = NULL;
+MPI_Win *stat_tok;
+
+/* Active messages variables */
+
+char **buff_am;
+MPI_Status *s_am;
+MPI_Request *req_am;
+MPI_Datatype *dts;
+char *msgbody;
+pthread_mutex_t lock_am;
+int done_am=0;
+
+char err_buffer[MPI_MAX_ERROR_STRING];
+
+/* All CAF runtime calls should use this comm instead of
+   MPI_COMM_WORLD for interoperability purposes. */
+MPI_Comm CAF_COMM_WORLD;
+
+/* For MPI interoperability, allow external initialization
+   (and thus finalization) of MPI. */
+bool caf_owns_mpi = false;
+
+/* Foo function pointers for coreduce */
+int (*foo_int32_t)(void *, void *);
+float (*foo_float)(void *, void *);
+double (*foo_double)(void *, void *);
+
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
+#if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
+void explicit_flush()
+{
+  win_sync *w=pending_puts, *t;
+  MPI_Win *p;
+  while(w != NULL)
+    {
+      p = w->win;
+      MPI_Win_flush(w->img,*p);
+      t = w;
+      w = w->next;
+      free(t);
+    }
+  last_elem = NULL;
+  pending_puts = NULL;
+}
+#endif
+
+#ifdef HELPER
+void helperFunction()
+{
+  int i = 0, flag = 0, msgid = 0;
+  int ndim = 0, position = 0;
+
+  s_am = calloc(caf_num_images, sizeof(MPI_Status));
+  req_am = calloc(caf_num_images, sizeof(MPI_Request));
+  dts = calloc(caf_num_images, sizeof(MPI_Datatype));
+
+  for(i=0;i<caf_num_images;i++)
+    MPI_Irecv(buff_am[i], 1000, MPI_PACKED, i, 1, CAF_COMM_WORLD, &req_am[i]);
+
+  while(1)
+    {
+      pthread_mutex_lock(&lock_am);
+      for(i=0;i<caf_num_images;i++)
+        {
+          if(!caf_is_finalized)
+            {
+              MPI_Test(&req_am[i], &flag, &s_am[i]);
+              if(flag==1)
+                {
+                  position = 0;
+                  MPI_Unpack(buff_am[i], 1000, &position, &msgid, 1, MPI_INT, CAF_COMM_WORLD);
+                  /* msgid=2 was initially assigned to strided transfers, it can be reused */
+                  /* Strided transfers Msgid=2 */
+
+                  /* You can add you own function */
+
+                  if(msgid==2)
+                    {
+                      msgid=0; position=0;
+                    }
+                  MPI_Irecv(buff_am[i], 1000, MPI_PACKED, i, 1, CAF_COMM_WORLD, &req_am[i]);
+                  flag=0;
+                }
+            }
+          else
+            {
+              done_am=1;
+              pthread_mutex_unlock(&lock_am);
+              return;
+            }
+        }
+        pthread_mutex_unlock(&lock_am);
+    }
+}
+#endif
+/* Keep in sync with single.c.  */
+static void
+caf_runtime_error (const char *message, ...)
+{
+  va_list ap;
+  fprintf (stderr, "Fortran runtime error on image %d: ", caf_this_image);
+  va_start (ap, message);
+  vfprintf (stderr, message, ap);
+  va_end (ap);
+  fprintf (stderr, "\n");
+
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  /* FIXME: Do some more effort than just to abort.  */
+  //  MPI_Finalize();
+
+  /* Should be unreachable, but to make sure also call exit.  */
+  exit (EXIT_FAILURE);
+}
+
+/* FIXME: CMake chokes on the "inline" keyword below.  If we can detect that CMake is  */
+/*        being used, we could add something of the form "#ifdef _CMAKE" to remove the */
+/*        keyword only when building with CMake */
+/* inline */ void locking_atomic_op(MPI_Win win, int *value, int newval,
+			      int compare, int image_index, int index)
+{
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, win);
+# endif // CAF_MPI_LOCK_UNLOCK
+      MPI_Compare_and_swap (&newval,&compare,value, MPI_INT,image_index-1,
+                            index*sizeof(int), win);
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock (image_index-1, win);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush (image_index-1, win);
+# endif // CAF_MPI_LOCK_UNLOCK
+}
+
+void mutex_lock(MPI_Win win, int image_index, int index, int *stat,
+		int *acquired_lock, char *errmsg, int errmsg_len)
+{
+  const char msg[] = "Already locked";
+#if MPI_VERSION >= 3
+  int value=1, compare = 0, newval = caf_this_image, i = 1;
+
+  if(stat != NULL)
+    *stat = 0;
+
+  locking_atomic_op(win, &value, newval, compare, image_index, index);
+
+  if(value == caf_this_image && image_index == caf_this_image)
+    goto stat_error;
+
+  if(acquired_lock != NULL)
+    {
+      if(value == 0)
+        *acquired_lock = 1;
+      else
+	*acquired_lock = 0;
+      return;
+    }
+
+  while(value != 0)
+    {
+      locking_atomic_op(win, &value, newval, compare, image_index, index);
+      usleep(caf_this_image*i);
+      i++;
+    }
+
+  return;
+
+stat_error:
+  if(errmsg != NULL)
+    {
+      memset(errmsg,' ',errmsg_len);
+      memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
+    }
+  if(stat != NULL)
+    *stat = 99;
+  else
+    error_stop(99);
+#else // MPI_VERSION
+#warning Locking for MPI-2 is not implemented
+  printf ("Locking for MPI-2 is not supported, please update your MPI implementation\n");
+#endif // MPI_VERSION
+}
+
+void mutex_unlock(MPI_Win win, int image_index, int index, int *stat,
+		  char* errmsg, int errmsg_len)
+{
+  const char msg[] = "Variable is not locked";
+  if(stat != NULL)
+    *stat = 0;
+#if MPI_VERSION >= 3
+  int value=1, compare = 1, newval = 0;
+
+  /* locking_atomic_op(win, &value, newval, compare, image_index, index); */
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, win);
+# endif // CAF_MPI_LOCK_UNLOCK
+  MPI_Fetch_and_op(&newval, &value, MPI_INT, image_index-1, index*sizeof(int), MPI_REPLACE, win);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image_index-1, win);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image_index-1, win);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+  if(value == 0)
+    goto stat_error;
+
+  return;
+
+stat_error:
+  if(errmsg != NULL)
+    {
+      memset(errmsg,' ',errmsg_len);
+      memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
+    }
+  if(stat != NULL)
+    *stat = 99;
+  else
+    error_stop(99);
+#else // MPI_VERSION
+#warning Locking for MPI-2 is not implemented
+  printf ("Locking for MPI-2 is not supported, please update your MPI implementation\n");
+#endif // MPI_VERSION
+}
+
+/* Initialize coarray program.  This routine assumes that no other
+   GASNet initialization happened before. */
+
+void
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+_gfortran_caf_init (int *argc, char ***argv)
+#else
+PREFIX (init) (int *argc, char ***argv)
+#endif
+{
+  if (caf_num_images == 0)
+    {
+      int ierr = 0, i = 0, j = 0;
+
+      int is_init = 0, prior_thread_level = MPI_THREAD_SINGLE;
+      MPI_Initialized(&is_init);
+
+      if (is_init) {
+          MPI_Query_thread(&prior_thread_level);
+      }
+#ifdef HELPER
+      int prov_lev=0;
+      if (is_init) {
+          prov_lev = prior_thread_level;
+          caf_owns_mpi = false;
+      } else {
+          MPI_Init_thread(argc, argv, MPI_THREAD_MULTIPLE, &prov_lev);
+          caf_owns_mpi = true;
+      }
+
+      if(caf_this_image == 0 && MPI_THREAD_MULTIPLE != prov_lev)
+        caf_runtime_error ("MPI_THREAD_MULTIPLE is not supported: %d", prov_lev);
+#else
+      if (is_init) {
+          caf_owns_mpi = false;
+      } else {
+          MPI_Init(argc, argv);
+          caf_owns_mpi = true;
+      }
+#endif
+      if (unlikely ((ierr != MPI_SUCCESS)))
+        caf_runtime_error ("Failure when initializing MPI: %d", ierr);
+
+      /* Duplicate MPI_COMM_WORLD so that no CAF internal functions
+         use it - this is critical for MPI-interoperability. */
+      MPI_Comm_dup(MPI_COMM_WORLD, &CAF_COMM_WORLD);
+
+      MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
+      MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
+
+      caf_this_image++;
+      caf_is_finalized = 0;
+
+      images_full = (int *) calloc (caf_num_images-1, sizeof (int));
+
+      for (i = 0; i < caf_num_images; i++)
+        if (i + 1 != caf_this_image)
+          {
+            images_full[j] = i + 1;
+            j++;
+          }
+
+      orders = calloc (caf_num_images, sizeof (int));
+      arrived = calloc (caf_num_images, sizeof (int));
+
+      handlers = malloc(caf_num_images * sizeof(MPI_Request));
+
+      stat_tok = malloc (sizeof(MPI_Win));
+
+#if MPI_VERSION >= 3
+      MPI_Info_create (&mpi_info_same_size);
+      MPI_Info_set (mpi_info_same_size, "same_size", "true");
+      /* Setting img_status */
+      MPI_Win_allocate(sizeof(int), 1, mpi_info_same_size, CAF_COMM_WORLD, &img_status, stat_tok);
+# ifndef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock_all(MPI_MODE_NOCHECK, *stat_tok);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else
+      MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &img_status, stat_tok);
+      MPI_Win_create(img_status, sizeof(int), 1, MPI_INFO_NULL, CAF_COMM_WORLD, stat_tok);
+#endif // MPI_VERSION
+      *img_status = 0;
+    }
+  /* MPI_Barrier(CAF_COMM_WORLD); */
+}
+
+
+/* Finalize coarray program.   */
+
+void
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+_gfortran_caf_finalize (void)
+#else
+PREFIX (finalize) (void)
+#endif
+{
+  *img_status = STAT_STOPPED_IMAGE; /* GFC_STAT_STOPPED_IMAGE = 6000 */
+  MPI_Win_sync(*stat_tok);
+
+  MPI_Barrier(CAF_COMM_WORLD);
+
+  while (caf_static_list != NULL)
+    {
+      caf_static_t *tmp = caf_static_list->prev;
+
+      free (caf_static_list);
+      caf_static_list = tmp;
+    }
+
+  caf_static_t *tmp_tot = caf_tot, *prev = caf_tot;
+  MPI_Win *p;
+
+  while(tmp_tot)
+    {
+      prev = tmp_tot->prev;
+      p = tmp_tot->token;
+# ifndef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock_all(*p);
+# endif // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_free(p);
+      free(tmp_tot);
+      tmp_tot = prev;
+    }
+#if MPI_VERSION >= 3
+  MPI_Info_free (&mpi_info_same_size);
+#endif // MPI_VERSION
+
+  MPI_Comm_free(&CAF_COMM_WORLD);
+
+  /* Only call Finalize if CAF runtime Initialized MPI. */
+  if (caf_owns_mpi) {
+      MPI_Finalize();
+  }
+  pthread_mutex_lock(&lock_am);
+  caf_is_finalized = 1;
+  pthread_mutex_unlock(&lock_am);
+  exit(0);
+}
+
+
+int
+PREFIX (this_image)(int distance __attribute__ ((unused)))
+{
+  return caf_this_image;
+}
+
+
+int
+PREFIX (num_images)(int distance __attribute__ ((unused)),
+                         int failed __attribute__ ((unused)))
+{
+  return caf_num_images;
+}
+
+
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+void *
+  _gfortran_caf_register (size_t size, caf_register_t type, caf_token_t *token,
+                  int *stat, char *errmsg, int errmsg_len)
+#else
+void *
+  PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
+                  int *stat, char *errmsg, int errmsg_len)
+#endif
+{
+  /* int ierr; */
+  void *mem;
+  size_t actual_size;
+  int l_var=0, *init_array=NULL;
+
+  if (unlikely (caf_is_finalized))
+    goto error;
+
+  /* Start GASNET if not already started.  */
+  if (caf_num_images == 0)
+#ifdef COMPILER_SUPPORTS_CAF_INTRINSICS
+    _gfortran_caf_init (NULL, NULL);
+#else
+    PREFIX (init) (NULL, NULL);
+#endif
+
+  /* Token contains only a list of pointers.  */
+
+  *token = malloc (sizeof(MPI_Win));
+
+  MPI_Win *p = *token;
+
+  if(type == CAF_REGTYPE_LOCK_STATIC || type == CAF_REGTYPE_LOCK_ALLOC ||
+     type == CAF_REGTYPE_CRITICAL || type == CAF_REGTYPE_EVENT_STATIC ||
+     type == CAF_REGTYPE_EVENT_ALLOC)
+    {
+      actual_size = size*sizeof(int);
+      l_var = 1;
+    }
+  else
+    actual_size = size;
+
+#if MPI_VERSION >= 3
+  MPI_Win_allocate(actual_size, 1, mpi_info_same_size, CAF_COMM_WORLD, &mem, *token);
+# ifndef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock_all(MPI_MODE_NOCHECK, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else // MPI_VERSION
+  MPI_Alloc_mem(actual_size, MPI_INFO_NULL, &mem);
+  MPI_Win_create(mem, actual_size, 1, MPI_INFO_NULL, CAF_COMM_WORLD, *token);
+#endif // MPI_VERSION
+
+  p = *token;
+
+  if(l_var)
+    {
+      init_array = (int *)calloc(size, sizeof(int));
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image-1, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+      MPI_Put (init_array, size, MPI_INT, caf_this_image-1,
+                      0, size, MPI_INT, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock(caf_this_image-1, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush(caf_this_image-1, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+      free(init_array);
+      PREFIX(sync_all) (NULL,NULL,0);
+    }
+
+  caf_static_t *tmp = malloc (sizeof (caf_static_t));
+  tmp->prev  = caf_tot;
+  tmp->token = *token;
+  caf_tot = tmp;
+
+  if (type == CAF_REGTYPE_COARRAY_STATIC)
+    {
+      tmp = malloc (sizeof (caf_static_t));
+      tmp->prev  = caf_static_list;
+      tmp->token = *token;
+      caf_static_list = tmp;
+    }
+
+  if (stat)
+    *stat = 0;
+
+  return mem;
+
+error:
+  {
+    char *msg;
+
+    if (caf_is_finalized)
+      msg = "Failed to allocate coarray - there are stopped images";
+    else
+      msg = "Failed to allocate coarray";
+
+    if (stat)
+      {
+        *stat = caf_is_finalized ? STAT_STOPPED_IMAGE : 1;
+        if (errmsg_len > 0)
+          {
+            int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+                                                        : (int) strlen (msg);
+            memcpy (errmsg, msg, len);
+            if (errmsg_len > len)
+              memset (&errmsg[len], ' ', errmsg_len-len);
+          }
+      }
+    else
+      caf_runtime_error (msg);
+  }
+
+  return NULL;
+}
+
+
+void
+PREFIX (deregister) (caf_token_t *token, int *stat, char *errmsg, int errmsg_len)
+{
+  /* int ierr; */
+
+  if (unlikely (caf_is_finalized))
+    {
+      const char msg[] = "Failed to deallocate coarray - "
+                          "there are stopped images";
+      if (stat)
+        {
+          *stat = STAT_STOPPED_IMAGE;
+
+          if (errmsg_len > 0)
+            {
+              int len = ((int) sizeof (msg) - 1 > errmsg_len)
+                        ? errmsg_len : (int) sizeof (msg) - 1;
+              memcpy (errmsg, msg, len);
+              if (errmsg_len > len)
+                memset (&errmsg[len], ' ', errmsg_len-len);
+            }
+          return;
+        }
+      caf_runtime_error (msg);
+    }
+
+  PREFIX (sync_all) (NULL, NULL, 0);
+
+  caf_static_t *tmp = caf_tot, *prev = caf_tot, *next=caf_tot;
+  MPI_Win *p = *token;
+
+  while(tmp)
+    {
+      prev = tmp->prev;
+
+      if(tmp->token == *token)
+        {
+          p = *token;
+# ifndef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_unlock_all(*p);
+# endif // CAF_MPI_LOCK_UNLOCK
+          MPI_Win_free(p);
+
+          if(prev)
+            next->prev = prev->prev;
+          else
+            next->prev = NULL;
+
+          if(tmp == caf_tot)
+            caf_tot = prev;
+
+          free(tmp);
+          break;
+        }
+
+      next = tmp;
+      tmp = prev;
+    }
+
+  if (stat)
+    *stat = 0;
+
+  /* if (unlikely (ierr = ARMCI_Free ((*token)[caf_this_image-1]))) */
+  /*   caf_runtime_error ("ARMCI memory freeing failed: Error code %d", ierr); */
+  //gasnet_exit(0);
+
+  free (*token);
+}
+
+void
+PREFIX (sync_memory) (int *stat, char *errmsg, int errmsg_len)
+{
+#if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
+  explicit_flush();
+#endif
+}
+
+
+void
+PREFIX (sync_all) (int *stat, char *errmsg, int errmsg_len)
+{
+  int ierr=0;
+
+  if (unlikely (caf_is_finalized))
+    ierr = STAT_STOPPED_IMAGE;
+  else
+    {
+#if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
+      explicit_flush();
+#endif
+      MPI_Barrier(CAF_COMM_WORLD);
+      ierr = 0;
+    }
+
+  if (stat)
+    *stat = ierr;
+
+  if (ierr)
+    {
+      char *msg;
+      if (caf_is_finalized)
+        msg = "SYNC ALL failed - there are stopped images";
+      else
+        msg = "SYNC ALL failed";
+
+      if (errmsg_len > 0)
+        {
+          int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+                                                      : (int) strlen (msg);
+          memcpy (errmsg, msg, len);
+          if (errmsg_len > len)
+            memset (&errmsg[len], ' ', errmsg_len-len);
+        }
+      else
+        caf_runtime_error (msg);
+    }
+}
+
+/* token: The token of the array to be written to. */
+/* offset: Difference between the coarray base address and the actual data, used for caf(3)[2] = 8 or caf[4]%a(4)%b = 7. */
+/* image_index: Index of the coarray (typically remote, though it can also be on this_image). */
+/* data: Pointer to the to-be-transferred data. */
+/* size: The number of bytes to be transferred. */
+/* asynchronous: Return before the data transfer has been complete  */
+
+void selectType(int size, MPI_Datatype *dt)
+{
+  int t_s;
+
+  MPI_Type_size(MPI_INT, &t_s);
+
+  if(t_s==size)
+    {
+      *dt=MPI_INT;
+      return;
+    }
+
+  MPI_Type_size(MPI_DOUBLE, &t_s);
+
+  if(t_s==size)
+    {
+      *dt=MPI_DOUBLE;
+      return;
+    }
+
+  MPI_Type_size(MPI_COMPLEX, &t_s);
+
+  if(t_s==size)
+    {
+      *dt=MPI_COMPLEX;
+      return;
+    }
+
+  MPI_Type_size(MPI_DOUBLE_COMPLEX, &t_s);
+
+  if(t_s==size)
+    {
+      *dt=MPI_DOUBLE_COMPLEX;
+      return;
+    }
+
+}
+
+void
+PREFIX (sendget) (caf_token_t token_s, size_t offset_s, int image_index_s,
+                  gfc_descriptor_t *dest,
+                  caf_vector_t *dst_vector __attribute__ ((unused)),
+                  caf_token_t token_g, size_t offset_g,
+                  int image_index_g, gfc_descriptor_t *src ,
+                  caf_vector_t *src_vector __attribute__ ((unused)),
+                  int src_kind, int dst_kind, bool mrt)
+{
+  int ierr = 0;
+  size_t i, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  MPI_Win *p_s = token_s, *p_g = token_g;
+  ptrdiff_t dst_offset = 0;
+  ptrdiff_t src_offset = 0;
+  void *pad_str = NULL;
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+  char *tmp;
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+        dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  if (rank == 0
+      || (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+          && dst_kind == src_kind && GFC_DESCRIPTOR_RANK (src) != 0
+          && (GFC_DESCRIPTOR_TYPE (dest) != BT_CHARACTER || dst_size == src_size)
+          && PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src)))
+    {
+      tmp = (char *) calloc (size, dst_size);
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock (MPI_LOCK_SHARED, image_index_g-1, 0, *p_g);
+# endif // CAF_MPI_LOCK_UNLOCK
+      ierr = MPI_Get (tmp, dst_size*size, MPI_BYTE,
+                      image_index_g-1, offset_g, dst_size*size, MPI_BYTE, *p_g);
+      if (pad_str)
+        memcpy ((char *) tmp + src_size, pad_str,
+                dst_size-src_size);
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock (image_index_g-1, *p_g);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush (image_index_g-1, *p_g);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index_s-1, 0, *p_s);
+# endif // CAF_MPI_LOCK_UNLOCK
+      if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+          && dst_kind == src_kind)
+        ierr = MPI_Put (tmp, dst_size*size, MPI_BYTE,
+                        image_index_s-1, offset_s,
+                        (dst_size > src_size ? src_size : dst_size) * size,
+                        MPI_BYTE, *p_s);
+      if (pad_str)
+        ierr = MPI_Put (pad_str, dst_size-src_size, MPI_BYTE, image_index_s-1,
+                        offset_s, dst_size - src_size, MPI_BYTE, *p_s);
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock (image_index_s-1, *p_s);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush (image_index_s-1, *p_s);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+      if (ierr != 0)
+        error_stop (ierr);
+      return;
+
+      free(tmp);
+    }
+  else
+    {
+      tmp = calloc(1, dst_size);
+
+      for (i = 0; i < size; i++)
+        {
+          ptrdiff_t array_offset_dst = 0;
+          ptrdiff_t stride = 1;
+          ptrdiff_t extent = 1;
+          for (j = 0; j < rank-1; j++)
+            {
+              array_offset_dst += ((i / (extent*stride))
+                                   % (dest->dim[j]._ubound
+                                      - dest->dim[j].lower_bound + 1))
+                * dest->dim[j]._stride;
+              extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+              stride = dest->dim[j]._stride;
+            }
+
+	  extent = (dest->dim[rank-1]._ubound - dest->dim[rank-1].lower_bound + 1);
+          array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+          dst_offset = offset_s + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+
+          ptrdiff_t array_offset_sr = 0;
+          if (GFC_DESCRIPTOR_RANK (src) != 0)
+            {
+              stride = 1;
+              extent = 1;
+              for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+                {
+                  array_offset_sr += ((i / (extent*stride))
+                                      % (src->dim[j]._ubound
+                                         - src->dim[j].lower_bound + 1))
+                    * src->dim[j]._stride;
+                  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+                  stride = src->dim[j]._stride;
+                }
+
+	      extent = (src->dim[rank-1]._ubound - src->dim[rank-1].lower_bound + 1);
+              array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+              array_offset_sr *= GFC_DESCRIPTOR_SIZE (src);
+            }
+          src_offset = offset_g + array_offset_sr;
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_lock (MPI_LOCK_SHARED, image_index_g-1, 0, *p_g);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+          ierr = MPI_Get (tmp, dst_size, MPI_BYTE,
+                          image_index_g-1, src_offset, src_size, MPI_BYTE, *p_g);
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_unlock (image_index_g-1, *p_g);
+# else // CAF_MPI_LOCK_UNLOCK
+          MPI_Win_flush (image_index_g-1, *p_g);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index_s-1, 0, *p_s);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+          ierr = MPI_Put (tmp, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, image_index_s-1,
+                          dst_offset, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, *p_s);
+          if (pad_str)
+            ierr = MPI_Put (pad_str, dst_size - src_size, MPI_BYTE, image_index_s-1,
+                            dst_offset, dst_size - src_size, MPI_BYTE, *p_s);
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_unlock (image_index_s-1, *p_s);
+# else // CAF_MPI_LOCK_UNLOCK
+          MPI_Win_flush (image_index_s-1, *p_s);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+          if (ierr != 0)
+            {
+              error_stop (ierr);
+              return;
+            }
+        }
+      free(tmp);
+    }
+
+}
+
+/* Send array data from src to dest on a remote image.  */
+/* The last argument means may_require_temporary */
+
+void
+PREFIX (send) (caf_token_t token, size_t offset, int image_index,
+               gfc_descriptor_t *dest,
+               caf_vector_t *dst_vector __attribute__ ((unused)),
+               gfc_descriptor_t *src, int dst_kind, int src_kind,
+               bool mrt)
+{
+  /* FIXME: Implement vector subscripts, type conversion and check whether
+     string-kind conversions are permitted.
+     FIXME: Implement sendget as well.  */
+  int ierr = 0;
+  size_t i, size;
+  int j;
+  /* int position, msg = 0;  */
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  MPI_Win *p = token;
+  ptrdiff_t dst_offset = 0;
+  void *pad_str = NULL;
+  void *t_buff = NULL;
+  bool *buff_map = NULL;
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+        dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+    {
+      pad_str = alloca (dst_size - src_size);
+      if (dst_kind == 1)
+        memset (pad_str, ' ', dst_size-src_size);
+      else /* dst_kind == 4.  */
+        for (i = 0; i < (dst_size-src_size)/4; i++)
+              ((int32_t*) pad_str)[i] = (int32_t) ' ';
+    }
+  if (rank == 0
+      || (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+          && dst_kind == src_kind && GFC_DESCRIPTOR_RANK (src) != 0
+          && (GFC_DESCRIPTOR_TYPE (dest) != BT_CHARACTER || dst_size == src_size)
+          && PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src)))
+    {
+      if(caf_this_image == image_index)
+        {
+          /* The address of source passed by the compiler points on the right
+           * memory location. No offset summation is needed.  */
+          void *dest_tmp = (void *) ((char *) dest->base_addr);// + offset);
+          memmove (dest_tmp,src->base_addr,size*dst_size);
+          return;
+        }
+      else
+        {
+#ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, *p);
+#endif // CAF_MPI_LOCK_UNLOCK
+          if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+              && dst_kind == src_kind)
+            ierr = MPI_Put (src->base_addr, (dst_size > src_size ? src_size : dst_size)*size, MPI_BYTE,
+                            image_index-1, offset,
+                            (dst_size > src_size ? src_size : dst_size) * size,
+                            MPI_BYTE, *p);
+          if (pad_str)
+	    {
+	      size_t newoff = offset + (dst_size > src_size ? src_size : dst_size) * size;
+	      ierr = MPI_Put (pad_str, dst_size-src_size, MPI_BYTE, image_index-1,
+			      newoff, dst_size - src_size, MPI_BYTE, *p);
+	    }
+#ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_unlock (image_index-1, *p);
+#elif NONBLOCKING_PUT
+	  /* Pending puts init */
+	  if(pending_puts == NULL)
+	    {
+	      pending_puts = calloc(1,sizeof(win_sync));
+	      pending_puts->next=NULL;
+	      pending_puts->win = token;
+	      pending_puts->img = image_index-1;
+	      last_elem = pending_puts;
+	      last_elem->next = NULL;
+	    }
+	  else
+	    {
+	      last_elem->next = calloc(1,sizeof(win_sync));
+	      last_elem = last_elem->next;
+	      last_elem->win = token;
+	      last_elem->img = image_index-1;
+	      last_elem->next = NULL;
+	    }
+#else
+	  MPI_Win_flush (image_index-1, *p);
+#endif // CAF_MPI_LOCK_UNLOCK
+        }
+
+      if (ierr != 0)
+        error_stop (ierr);
+      return;
+    }
+  else
+    {
+#ifdef STRIDED
+      MPI_Datatype dt_s, dt_d, base_type_src, base_type_dst;
+      int *arr_bl;
+      int *arr_dsp_s, *arr_dsp_d;
+
+      void *sr = src->base_addr;
+
+      selectType (GFC_DESCRIPTOR_SIZE (src), &base_type_src);
+      selectType (GFC_DESCRIPTOR_SIZE (dest), &base_type_dst);
+
+      if(rank == 1)
+        {
+          MPI_Type_vector(size, 1, src->dim[0]._stride, base_type_src, &dt_s);
+          MPI_Type_vector(size, 1, dest->dim[0]._stride, base_type_dst, &dt_d);
+        }
+      /* else if(rank == 2) */
+      /*   { */
+      /*     MPI_Type_vector(size/src->dim[0]._ubound, src->dim[0]._ubound, src->dim[1]._stride, base_type_src, &dt_s); */
+      /*     MPI_Type_vector(size/dest->dim[0]._ubound, dest->dim[0]._ubound, dest->dim[1]._stride, base_type_dst, &dt_d); */
+      /*   } */
+      else
+        {
+          arr_bl = calloc (size, sizeof (int));
+          arr_dsp_s = calloc (size, sizeof (int));
+          arr_dsp_d = calloc (size, sizeof (int));
+
+          for (i = 0; i < size; i++)
+            arr_bl[i] = 1;
+
+          for (i = 0; i < size; i++)
+            {
+              ptrdiff_t array_offset_dst = 0;
+              ptrdiff_t stride = 1;
+              ptrdiff_t extent = 1;
+	      ptrdiff_t tot_ext = 1;
+              for (j = 0; j < rank-1; j++)
+                {
+                  array_offset_dst += ((i / tot_ext)
+                                       % (dest->dim[j]._ubound
+                                          - dest->dim[j].lower_bound + 1))
+                    * dest->dim[j]._stride;
+                  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+                  stride = dest->dim[j]._stride;
+		  tot_ext *= extent;
+                }
+
+              array_offset_dst += (i / tot_ext) * dest->dim[rank-1]._stride;
+              arr_dsp_d[i] = array_offset_dst;
+
+              if (GFC_DESCRIPTOR_RANK (src) != 0)
+                {
+                  ptrdiff_t array_offset_sr = 0;
+                  stride = 1;
+                  extent = 1;
+		  tot_ext = 1;
+                  for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+                    {
+                      array_offset_sr += ((i / tot_ext)
+                                          % (src->dim[j]._ubound
+                                             - src->dim[j].lower_bound + 1))
+                        * src->dim[j]._stride;
+                      extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+                      stride = src->dim[j]._stride;
+		      tot_ext *= extent;
+                    }
+
+                  array_offset_sr += (i / tot_ext) * src->dim[rank-1]._stride;
+                  arr_dsp_s[i] = array_offset_sr;
+                }
+              else
+                arr_dsp_s[i] = 0;
+            }
+
+          MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
+          MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
+
+          free (arr_bl);
+          free (arr_dsp_s);
+          free (arr_dsp_d);
+        }
+
+      MPI_Type_commit(&dt_s);
+      MPI_Type_commit(&dt_d);
+
+      dst_offset = offset;
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+      ierr = MPI_Put (sr, 1, dt_s, image_index-1, dst_offset, 1, dt_d, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock (image_index-1, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush (image_index-1, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+      if (ierr != 0)
+        {
+          error_stop (ierr);
+          return;
+        }
+
+      MPI_Type_free (&dt_s);
+      MPI_Type_free (&dt_d);
+
+      /* msg = 2; */
+      /* MPI_Pack(&msg, 1, MPI_INT, buff_am[caf_this_image], 1000, &position, CAF_COMM_WORLD); */
+      /* MPI_Pack(&rank, 1, MPI_INT, buff_am[caf_this_image], 1000, &position, CAF_COMM_WORLD); */
+
+      /* for(j=0;j<rank;j++) */
+      /*   { */
+      /*     MPI_Pack(&(dest->dim[j]._stride), 1, MPI_INT, buff_am[caf_this_image], 1000, &position, CAF_COMM_WORLD); */
+      /*     MPI_Pack(&(dest->dim[j].lower_bound), 1, MPI_INT, buff_am[caf_this_image], 1000, &position, CAF_COMM_WORLD); */
+      /*     MPI_Pack(&(dest->dim[j]._ubound), 1, MPI_INT, buff_am[caf_this_image], 1000, &position, CAF_COMM_WORLD); */
+      /*   } */
+
+      /* MPI_Pack(&size, 1, MPI_INT, buff_am[caf_this_image], 1000, &position, CAF_COMM_WORLD); */
+
+      /* /\* non-blocking send *\/ */
+
+      /* MPI_Issend(buff_am[caf_this_image], position, MPI_PACKED, image_index-1, 1, CAF_COMM_WORLD, &reqdt); */
+
+      /* msgbody = calloc(size, sizeof(char)); */
+
+      /* ptrdiff_t array_offset_sr = 0; */
+      /* ptrdiff_t stride = 1; */
+      /* ptrdiff_t extent = 1; */
+
+      /* for(i = 0; i < size; i++) */
+      /*   { */
+      /*     for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++) */
+      /*         { */
+      /*           array_offset_sr += ((i / (extent*stride)) */
+      /*                               % (src->dim[j]._ubound */
+      /*                                  - src->dim[j].lower_bound + 1)) */
+      /*             * src->dim[j]._stride; */
+      /*           extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1); */
+      /*         stride = src->dim[j]._stride; */
+      /*         } */
+
+      /*     array_offset_sr += (i / extent) * src->dim[rank-1]._stride; */
+
+      /*     void *sr = (void *)((char *) src->base_addr */
+      /*                           + array_offset_sr*GFC_DESCRIPTOR_SIZE (src)); */
+
+      /*     memmove (msgbody+p_mb, sr, GFC_DESCRIPTOR_SIZE (src)); */
+
+      /*     p_mb += GFC_DESCRIPTOR_SIZE (src); */
+      /*   } */
+
+      /* MPI_Wait(&reqdt, &stadt); */
+
+      /* MPI_Ssend(msgbody, size, MPI_BYTE, image_index-1, 1, CAF_COMM_WORLD); */
+
+      /* free(msgbody); */
+
+#else
+      if(caf_this_image == image_index && mrt)
+        {
+          t_buff = calloc(size,GFC_DESCRIPTOR_SIZE (dest));
+          buff_map = calloc(size,sizeof(bool));
+        }
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image_index-1, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+      for (i = 0; i < size; i++)
+        {
+          ptrdiff_t array_offset_dst = 0;
+          ptrdiff_t stride = 1;
+          ptrdiff_t extent = 1;
+	  ptrdiff_t tot_ext = 1;
+          for (j = 0; j < rank-1; j++)
+            {
+              array_offset_dst += ((i / tot_ext)
+                                   % (dest->dim[j]._ubound
+                                      - dest->dim[j].lower_bound + 1))
+                * dest->dim[j]._stride;
+              extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+              stride = dest->dim[j]._stride;
+	      tot_ext *= extent;
+            }
+
+          array_offset_dst += (i / tot_ext) * dest->dim[rank-1]._stride;
+          dst_offset = offset + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+
+          void *sr;
+          if (GFC_DESCRIPTOR_RANK (src) != 0)
+            {
+              ptrdiff_t array_offset_sr = 0;
+              stride = 1;
+              extent = 1;
+	      tot_ext = 1;
+              for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+                {
+                  array_offset_sr += ((i / tot_ext)
+                                      % (src->dim[j]._ubound
+                                         - src->dim[j].lower_bound + 1))
+                    * src->dim[j]._stride;
+                  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+                  stride = src->dim[j]._stride;
+		  tot_ext *= extent;
+                }
+
+              array_offset_sr += (i / tot_ext) * src->dim[rank-1]._stride;
+              sr = (void *)((char *) src->base_addr
+                            + array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+            }
+          else
+            sr = src->base_addr;
+
+          if(caf_this_image == image_index)
+            {
+              if(!mrt)
+                memmove(dest->base_addr+dst_offset,sr,GFC_DESCRIPTOR_SIZE (src));
+              else
+                {
+                  memmove(t_buff+i*GFC_DESCRIPTOR_SIZE (src),sr,GFC_DESCRIPTOR_SIZE (src));
+                  buff_map[i] = true;
+                }
+            }
+          else
+            {
+              ierr = MPI_Put (sr, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, image_index-1,
+                              dst_offset, GFC_DESCRIPTOR_SIZE (dest), MPI_BYTE, *p);
+              if (pad_str)
+                ierr = MPI_Put (pad_str, dst_size - src_size, MPI_BYTE, image_index-1,
+                                dst_offset, dst_size - src_size, MPI_BYTE, *p);
+            }
+
+          if (ierr != 0)
+            {
+              error_stop (ierr);
+              return;
+            }
+        }
+
+      if(caf_this_image == image_index && mrt)
+        {
+          for(i=0;i<size;i++)
+            {
+              if(buff_map[i])
+                {
+                  ptrdiff_t array_offset_dst = 0;
+                  ptrdiff_t stride = 1;
+                  ptrdiff_t extent = 1;
+		  ptrdiff_t tot_ext = 1;
+                  for (j = 0; j < rank-1; j++)
+                    {
+                      array_offset_dst += ((i / tot_ext)
+                                           % (dest->dim[j]._ubound
+                                              - dest->dim[j].lower_bound + 1))
+                        * dest->dim[j]._stride;
+                      extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+                      stride = dest->dim[j]._stride;
+		      tot_ext *= extent;
+                    }
+
+		  //extent = (dest->dim[rank-1]._ubound - dest->dim[rank-1].lower_bound + 1);
+                  array_offset_dst += (i / tot_ext) * dest->dim[rank-1]._stride;
+                  dst_offset = offset + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+                  memmove(src->base_addr+dst_offset,t_buff+i*GFC_DESCRIPTOR_SIZE (src),GFC_DESCRIPTOR_SIZE (src));
+                }
+            }
+          free(t_buff);
+          free(buff_map);
+        }
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock (image_index-1, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush (image_index-1, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#endif
+    }
+}
+
+
+/* Get array data from a remote src to a local dest.  */
+
+void
+PREFIX (get) (caf_token_t token, size_t offset,
+              int image_index,
+              gfc_descriptor_t *src ,
+              caf_vector_t *src_vector __attribute__ ((unused)),
+              gfc_descriptor_t *dest, int src_kind, int dst_kind,
+              bool mrt)
+{
+  size_t i, size;
+  int ierr = 0;
+  int j;
+  MPI_Win *p = token;
+  int rank = GFC_DESCRIPTOR_RANK (src);
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+  void *t_buff = NULL;
+  bool *buff_map = NULL;
+  void *pad_str = NULL;
+  /* size_t sr_off = 0;  */
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+        dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+    {
+      pad_str = alloca (dst_size - src_size);
+      if (dst_kind == 1)
+        memset (pad_str, ' ', dst_size-src_size);
+      else /* dst_kind == 4.  */
+        for (i = 0; i < (dst_size-src_size)/4; i++)
+              ((int32_t*) pad_str)[i] = (int32_t) ' ';
+    }
+
+  if (rank == 0
+      || (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+          && dst_kind == src_kind
+          && (GFC_DESCRIPTOR_TYPE (dest) != BT_CHARACTER || dst_size == src_size)
+          && PREFIX (is_contiguous) (dest) && PREFIX (is_contiguous) (src)))
+    {
+      /* FIXME: Handle image_index == this_image().  */
+      /*  if (async == false) */
+      if(caf_this_image == image_index)
+        {
+          /* The address of source passed by the compiler points on the right
+           * memory location. No offset summation is needed.  */
+          void *src_tmp = (void *) ((char *) src->base_addr);// + offset);
+          memmove(dest->base_addr,src_tmp,size*src_size);
+          return;
+        }
+      else
+        {
+# ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_lock (MPI_LOCK_SHARED, image_index-1, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+          ierr = MPI_Get (dest->base_addr, dst_size*size, MPI_BYTE,
+                          image_index-1, offset, dst_size*size, MPI_BYTE, *p);
+          if (pad_str)
+            memcpy ((char *) dest->base_addr + src_size, pad_str,
+                    dst_size-src_size);
+# ifdef CAF_MPI_LOCK_UNLOCK
+          MPI_Win_unlock (image_index-1, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+          MPI_Win_flush (image_index-1, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+        }
+      if (ierr != 0)
+        error_stop (ierr);
+      return;
+    }
+
+#ifdef STRIDED
+
+  MPI_Datatype dt_s, dt_d, base_type_src, base_type_dst;
+  int *arr_bl;
+  int *arr_dsp_s, *arr_dsp_d;
+
+  void *dst = dest->base_addr;
+
+  selectType(GFC_DESCRIPTOR_SIZE (src), &base_type_src);
+  selectType(GFC_DESCRIPTOR_SIZE (dest), &base_type_dst);
+
+  if(rank == 1)
+    {
+      MPI_Type_vector(size, 1, src->dim[0]._stride, base_type_src, &dt_s);
+      MPI_Type_vector(size, 1, dest->dim[0]._stride, base_type_dst, &dt_d);
+    }
+  /* else if(rank == 2) */
+  /*   { */
+  /*     MPI_Type_vector(size/src->dim[0]._ubound, src->dim[0]._ubound, src->dim[1]._stride, base_type_src, &dt_s); */
+  /*     MPI_Type_vector(size/dest->dim[0]._ubound, dest->dim[0]._ubound, dest->dim[1]._stride, base_type_dst, &dt_d); */
+  /*   } */
+  else
+    {
+      arr_bl = calloc(size, sizeof(int));
+      arr_dsp_s = calloc(size, sizeof(int));
+      arr_dsp_d = calloc(size, sizeof(int));
+
+      for(i=0;i<size;i++)
+        arr_bl[i]=1;
+
+      for (i = 0; i < size; i++)
+        {
+          ptrdiff_t array_offset_dst = 0;
+          ptrdiff_t stride = 1;
+          ptrdiff_t extent = 1;
+	  ptrdiff_t tot_ext = 1;
+          for (j = 0; j < rank-1; j++)
+            {
+              array_offset_dst += ((i / tot_ext)
+                                   % (dest->dim[j]._ubound
+                                      - dest->dim[j].lower_bound + 1))
+                * dest->dim[j]._stride;
+              extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+              stride = dest->dim[j]._stride;
+	      tot_ext *= extent;
+            }
+
+	  //extent = (dest->dim[rank-1]._ubound - dest->dim[rank-1].lower_bound + 1);
+          array_offset_dst += (i / tot_ext) * dest->dim[rank-1]._stride;
+          arr_dsp_d[i] = array_offset_dst;
+
+          ptrdiff_t array_offset_sr = 0;
+          stride = 1;
+          extent = 1;
+	  tot_ext = 1;
+          for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+            {
+              array_offset_sr += ((i / tot_ext)
+                                  % (src->dim[j]._ubound
+                                     - src->dim[j].lower_bound + 1))
+                * src->dim[j]._stride;
+              extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+              stride = src->dim[j]._stride;
+	      tot_ext *= extent;
+            }
+
+	  //extent = (src->dim[rank-1]._ubound - src->dim[rank-1].lower_bound + 1);
+          array_offset_sr += (i / tot_ext) * src->dim[rank-1]._stride;
+          arr_dsp_s[i] = array_offset_sr;
+
+        }
+
+      MPI_Type_indexed(size, arr_bl, arr_dsp_s, base_type_src, &dt_s);
+      MPI_Type_indexed(size, arr_bl, arr_dsp_d, base_type_dst, &dt_d);
+
+      free(arr_bl);
+      free(arr_dsp_s);
+      free(arr_dsp_d);
+    }
+
+  MPI_Type_commit(&dt_s);
+  MPI_Type_commit(&dt_d);
+
+  //sr_off = offset;
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_SHARED, image_index-1, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+  ierr = MPI_Get (dst, 1, dt_d, image_index-1, offset, 1, dt_s, *p);
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image_index-1, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image_index-1, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+  if (ierr != 0)
+    error_stop (ierr);
+
+  MPI_Type_free(&dt_s);
+  MPI_Type_free(&dt_d);
+
+#else
+  if(caf_this_image == image_index && mrt)
+    {
+      t_buff = calloc(size,GFC_DESCRIPTOR_SIZE (dest));
+      buff_map = calloc(size,sizeof(bool));
+    }
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_SHARED, image_index-1, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_dst = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      ptrdiff_t tot_ext = 1;
+      for (j = 0; j < rank-1; j++)
+        {
+          array_offset_dst += ((i / tot_ext)
+                               % (dest->dim[j]._ubound
+                                  - dest->dim[j].lower_bound + 1))
+                              * dest->dim[j]._stride;
+          extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	  tot_ext *= extent;
+        }
+
+      array_offset_dst += (i / tot_ext) * dest->dim[rank-1]._stride;
+
+      ptrdiff_t array_offset_sr = 0;
+      stride = 1;
+      extent = 1;
+      tot_ext = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+        {
+          array_offset_sr += ((i / tot_ext)
+                           % (src->dim[j]._ubound
+                              - src->dim[j].lower_bound + 1))
+                          * src->dim[j]._stride;
+          extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+          stride = src->dim[j]._stride;
+	  tot_ext *= extent;
+        }
+
+      array_offset_sr += (i / tot_ext) * src->dim[rank-1]._stride;
+
+      size_t sr_off = offset + array_offset_sr*GFC_DESCRIPTOR_SIZE (src);
+      void *dst = (void *) ((char *) dest->base_addr
+                            + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest));
+      /* FIXME: Handle image_index == this_image().  */
+      /*  if (async == false) */
+      if(caf_this_image == image_index)
+        {
+          /* Is this needed? */
+          if(!mrt)
+            memmove(dst,src->base_addr+array_offset_sr*GFC_DESCRIPTOR_SIZE(src),GFC_DESCRIPTOR_SIZE (src));
+          else
+            {
+              memmove(t_buff+i*GFC_DESCRIPTOR_SIZE (dest),dst,GFC_DESCRIPTOR_SIZE (dest));
+              buff_map[i] = true;
+            }
+        }
+      else
+        {
+          ierr = MPI_Get (dst, GFC_DESCRIPTOR_SIZE (dest),
+                          MPI_BYTE, image_index-1, sr_off,
+                          GFC_DESCRIPTOR_SIZE (src), MPI_BYTE, *p);
+          if (pad_str)
+            memcpy ((char *) dst + src_size, pad_str, dst_size-src_size);
+        }
+      if (ierr != 0)
+        error_stop (ierr);
+    }
+
+  if(caf_this_image == image_index && mrt)
+    {
+      for(i=0;i<size;i++)
+        {
+          if(buff_map[i])
+            {
+              ptrdiff_t array_offset_sr = 0;
+              ptrdiff_t stride = 1;
+              ptrdiff_t extent = 1;
+	      ptrdiff_t tot_ext = 1;
+              for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+                {
+                  array_offset_sr += ((i / tot_ext)
+                                      % (src->dim[j]._ubound
+                                         - src->dim[j].lower_bound + 1))
+                    * src->dim[j]._stride;
+                  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+                  stride = src->dim[j]._stride;
+		  tot_ext *= extent;
+                }
+
+	      //extent = (src->dim[rank-1]._ubound - src->dim[rank-1].lower_bound + 1);
+              array_offset_sr += (i / tot_ext) * src->dim[rank-1]._stride;
+
+              size_t sr_off = offset + array_offset_sr*GFC_DESCRIPTOR_SIZE (src);
+
+              memmove(dest->base_addr+sr_off,t_buff+i*GFC_DESCRIPTOR_SIZE (src),GFC_DESCRIPTOR_SIZE (src));
+            }
+        }
+      free(t_buff);
+      free(buff_map);
+    }
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image_index-1, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image_index-1, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#endif
+}
+
+
+/* SYNC IMAGES. Note: SYNC IMAGES(*) is passed as count == -1 while
+   SYNC IMAGES([]) has count == 0. Note further that SYNC IMAGES(*)
+   is not equivalent to SYNC ALL. */
+void
+PREFIX (sync_images) (int count, int images[], int *stat, char *errmsg,
+                     int errmsg_len)
+{
+  int ierr = 0, i=0, remote_stat = 0;
+  int dup = 0, j = 0;
+  MPI_Status s;
+
+  if (count == 0 || (count == 1 && images[0] == caf_this_image))
+    {
+      if (stat)
+        *stat = 0;
+      return;
+    }
+
+  /* halt execution if sync images contains duplicate image numbers */
+  for(i=0;i<count;i++)
+    for(j=0;j<i;j++)
+      if(images[i] == images[j])
+	{
+	  ierr = STAT_DUP_SYNC_IMAGES;
+	  if(stat)
+	    *stat = ierr;
+	  goto sync_images_err_chk;
+	}
+
+#ifdef GFC_CAF_CHECK
+  {
+    for (i = 0; i < count; i++)
+      if (images[i] < 1 || images[i] > caf_num_images)
+        {
+          fprintf (stderr, "COARRAY ERROR: Invalid image index %d to SYNC "
+                   "IMAGES", images[i]);
+          error_stop (1);
+        }
+  }
+#endif
+
+  if (unlikely (caf_is_finalized))
+    ierr = STAT_STOPPED_IMAGE;
+  else
+    {
+       if(count == -1)
+        {
+          for (i = 0; i < caf_num_images - 1; i++)
+            orders[images_full[i]-1]++;
+          count = caf_num_images-1;
+          images = images_full;
+        }
+      else
+        {
+          for (i = 0; i < count; i++)
+            orders[images[i]-1]++;
+        }
+
+#if defined(NONBLOCKING_PUT) && !defined(CAF_MPI_LOCK_UNLOCK)
+       explicit_flush();
+#endif
+
+       for(i = 0; i < count; i++)
+           ierr = MPI_Irecv(&arrived[images[i]-1], 1, MPI_INT, images[i]-1, 0, CAF_COMM_WORLD, &handlers[images[i]-1]);
+
+       for(i=0;i<count;i++)
+	 {
+# ifdef CAF_MPI_LOCK_UNLOCK
+	   MPI_Win_lock (MPI_LOCK_SHARED, images[i]-1, 0, *stat_tok);
+# endif // CAF_MPI_LOCK_UNLOCK
+	   ierr = MPI_Get (&remote_stat, 1, MPI_INT,
+			   images[i]-1, 0, 1, MPI_INT, *stat_tok);
+# ifdef CAF_MPI_LOCK_UNLOCK
+	   MPI_Win_unlock (images[i]-1, *stat_tok);
+# else // CAF_MPI_LOCK_UNLOCK
+	   MPI_Win_flush (images[i]-1, *stat_tok);
+# endif // CAF_MPI_LOCK_UNLOCK
+	   if(remote_stat != 0)
+	     {
+	       ierr = STAT_STOPPED_IMAGE;
+	       if(stat != NULL)
+		 *stat = ierr;
+	       goto sync_images_err_chk;
+	     }
+	 }
+
+       for(i=0; i < count; i++)
+         ierr = MPI_Send(&caf_this_image, 1, MPI_INT, images[i]-1, 0, CAF_COMM_WORLD);
+
+       for(i=0; i < count; i++)
+         ierr = MPI_Wait(&handlers[images[i]-1], &s);
+
+       memset(arrived, 0, sizeof(int)*caf_num_images);
+
+    }
+
+  if (stat)
+    *stat = ierr;
+
+ sync_images_err_chk:
+
+  if (ierr && stat == NULL)
+    {
+      char *msg;
+      if (caf_is_finalized)
+        msg = "SYNC IMAGES failed - there are stopped images";
+      else
+        msg = "SYNC IMAGES failed";
+
+      if (errmsg_len > 0)
+        {
+          int len = ((int) strlen (msg) > errmsg_len) ? errmsg_len
+                                                      : (int) strlen (msg);
+          memcpy (errmsg, msg, len);
+          if (errmsg_len > len)
+            memset (&errmsg[len], ' ', errmsg_len-len);
+        }
+      else
+        caf_runtime_error (msg);
+    }
+}
+
+
+#define GEN_REDUCTION(name, datatype, operator) \
+static void \
+name (datatype *invec, datatype *inoutvec, int *len, \
+               MPI_Datatype *datatype __attribute__ ((unused))) \
+{ \
+  int i; \
+  for (i = 0; i < len; i++) \
+    operator; \
+}
+
+#define FOOFUNC(TYPE) foo_ ## TYPE
+
+#define GEN_COREDUCE(name, dt)			\
+static void \
+name (void *invec, void *inoutvec, int *len, \
+      MPI_Datatype *datatype)		     \
+{ \
+  int i;	     \
+  for(i=0;i<*len;i++)				\
+    {								\
+      *((dt*)inoutvec) = (dt)(FOOFUNC(dt)((dt *)invec,(dt *)inoutvec));	\
+     invec+=sizeof(dt); inoutvec+=sizeof(dt);	\
+   } \
+}
+
+GEN_COREDUCE (redux_int32, int32_t)
+GEN_COREDUCE (redux_real32, float)
+GEN_COREDUCE (redux_real64, double)
+
+#ifndef MPI_INTEGER1
+GEN_REDUCTION (do_sum_int1, int8_t, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_min_int1, int8_t,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_int1, int8_t,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+#endif
+
+#ifndef MPI_INTEGER2
+GEN_REDUCTION (do_sum_int1, int16_t, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_min_int1, int16_t,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_int1, int16_t,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+#endif
+
+#if defined(MPI_INTEGER16) && defined(GFC_INTEGER_16)
+GEN_REDUCTION (do_sum_int1, GFC_INTEGER_16, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_min_int1, GFC_INTEGER_16,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_int1, GFC_INTEGER_16,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+#endif
+
+#if defined(GFC_DTYPE_REAL_10) \
+    || (!defined(GFC_DTYPE_REAL_10)  && defined(GFC_DTYPE_REAL_16))
+GEN_REDUCTION (do_sum_real10, long double, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_min_real10, long double,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_real10, long double,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_sum_complex10, _Complex long double, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_min_complex10, _Complex long double,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_complex10, _Complex long double,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+#endif
+
+#if defined(GFC_DTYPE_REAL_10) && defined(GFC_DTYPE_REAL_16)
+GEN_REDUCTION (do_sum_real10, __float128, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_min_real10, __float128,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_real10, __float128,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_sum_complex10, _Complex __float128, inoutvec[i] += invec[i])
+GEN_REDUCTION (do_mincomplexl10, _Complex __float128,
+               inoutvec[i] = invec[i] >= inoutvec[i] ? inoutvec[i] : invec[i])
+GEN_REDUCTION (do_max_complex10, _Complex __float128,
+               inoutvec[i] = invec[i] <= inoutvec[i] ? inoutvec[i] : invec[i])
+#endif
+#undef GEN_REDUCTION
+
+
+static MPI_Datatype
+get_MPI_datatype (gfc_descriptor_t *desc)
+{
+  /* FIXME: Better check whether the sizes are okay and supported;
+     MPI3 adds more types, e.g. MPI_INTEGER1.  */
+  switch (GFC_DTYPE_TYPE_SIZE (desc))
+    {
+#ifdef MPI_INTEGER1
+    case GFC_DTYPE_INTEGER_1:
+      return MPI_INTEGER1;
+#endif
+#ifdef MPI_INTEGER2
+    case GFC_DTYPE_INTEGER_2:
+      return MPI_INTEGER2;
+#endif
+    case GFC_DTYPE_INTEGER_4:
+#ifdef MPI_INTEGER4
+      return MPI_INTEGER4;
+#else
+      return MPI_INTEGER;
+#endif
+#ifdef MPI_INTEGER8
+    case GFC_DTYPE_INTEGER_8:
+      return MPI_INTEGER8;
+#endif
+#if defined(MPI_INTEGER16) && defined(GFC_DTYPE_INTEGER_16)
+    case GFC_DTYPE_INTEGER_16:
+      return MPI_INTEGER16;
+#endif
+
+    case GFC_DTYPE_LOGICAL_4:
+      return MPI_INT;
+
+    case GFC_DTYPE_REAL_4:
+#ifdef MPI_REAL4
+      return MPI_REAL4;
+#else
+      return MPI_REAL;
+#endif
+    case GFC_DTYPE_REAL_8:
+#ifdef MPI_REAL8
+      return MPI_REAL8;
+#else
+      return MPI_DOUBLE_PRECISION;
+#endif
+
+/* Note that we cannot use REAL_16 as we do not know whether it matches REAL(10)
+   or REAL(16), which have both the same bitsize and only make use of less
+   bits.  */
+    case GFC_DTYPE_COMPLEX_4:
+      return MPI_COMPLEX;
+    case GFC_DTYPE_COMPLEX_8:
+      return MPI_DOUBLE_COMPLEX;
+    }
+/* gfortran passes character string arguments with a
+   GFC_DTYPE_TYPE_SIZE == GFC_TYPE_CHARACTER + 64*strlen
+*/
+  if ( (GFC_DTYPE_TYPE_SIZE(desc)-GFC_DTYPE_CHARACTER)%64==0 )
+      return MPI_CHARACTER;
+
+  caf_runtime_error ("Unsupported data type in collective: %ld\n",GFC_DTYPE_TYPE_SIZE (desc));
+  return 0;
+}
+
+
+static void
+co_reduce_1 (MPI_Op op, gfc_descriptor_t *source, int result_image, int *stat,
+             char *errmsg, int src_len __attribute__ ((unused)), int errmsg_len)
+{
+  size_t i, size;
+  int j, ierr;
+  int rank = GFC_DESCRIPTOR_RANK (source);
+
+  MPI_Datatype datatype = get_MPI_datatype (source);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = source->dim[j]._ubound
+                            - source->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+        dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (rank == 0 || PREFIX (is_contiguous) (source))
+    {
+      if (result_image == 0)
+        ierr = MPI_Allreduce (MPI_IN_PLACE, source->base_addr, size, datatype,
+                              op, CAF_COMM_WORLD);
+      else if (result_image == caf_this_image)
+        ierr = MPI_Reduce (MPI_IN_PLACE, source->base_addr, size, datatype, op,
+                           result_image-1, CAF_COMM_WORLD);
+      else
+        ierr = MPI_Reduce (source->base_addr, NULL, size, datatype, op,
+                           result_image-1, CAF_COMM_WORLD);
+      if (ierr)
+        goto error;
+      return;
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_sr = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (source)-1; j++)
+        {
+          array_offset_sr += ((i / (extent*stride))
+                           % (source->dim[j]._ubound
+                              - source->dim[j].lower_bound + 1))
+                          * source->dim[j]._stride;
+          extent = (source->dim[j]._ubound - source->dim[j].lower_bound + 1);
+          stride = source->dim[j]._stride;
+        }
+      array_offset_sr += (i / extent) * source->dim[rank-1]._stride;
+      void *sr = (void *)((char *) source->base_addr
+                          + array_offset_sr*GFC_DESCRIPTOR_SIZE (source));
+      if (result_image == 0)
+        ierr = MPI_Allreduce (MPI_IN_PLACE, sr, 1, datatype, op,
+                              CAF_COMM_WORLD);
+      else if (result_image == caf_this_image)
+        ierr = MPI_Reduce (MPI_IN_PLACE, sr, 1, datatype, op,
+                           result_image-1, CAF_COMM_WORLD);
+      else
+        ierr = MPI_Reduce (sr, NULL, 1, datatype, op, result_image-1,
+                           CAF_COMM_WORLD);
+      if (ierr)
+        goto error;
+    }
+
+  if (stat)
+    *stat = 0;
+  return;
+error:
+  /* FIXME: Put this in an extra function and use it elsewhere.  */
+  if (stat)
+    {
+      *stat = ierr;
+      if (!errmsg)
+        return;
+    }
+
+  int len = sizeof (err_buffer);
+  MPI_Error_string (ierr, err_buffer, &len);
+  if (!stat)
+    {
+      err_buffer[len == sizeof (err_buffer) ? len-1 : len] = '\0';
+      caf_runtime_error ("CO_SUM failed with %s\n", err_buffer);
+    }
+  memcpy (errmsg, err_buffer, errmsg_len > len ? len : errmsg_len);
+  if (errmsg_len > len)
+    memset (&errmsg[len], '\0', errmsg_len - len);
+}
+
+void
+PREFIX (co_broadcast) (gfc_descriptor_t *a, int source_image, int *stat, char *errmsg,
+                       int errmsg_len)
+{
+  size_t i, size;
+  int j, ierr;
+  int rank = GFC_DESCRIPTOR_RANK (a);
+
+  MPI_Datatype datatype = get_MPI_datatype (a);
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = a->dim[j]._ubound
+                            - a->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+        dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (rank == 0)
+    {
+      if (datatype != MPI_CHARACTER)
+        ierr = MPI_Bcast(a->base_addr, size, datatype, source_image-1, CAF_COMM_WORLD);
+      else
+      {
+        int a_length;
+        if (caf_this_image==source_image)
+          a_length=strlen(a->base_addr);
+        /* Broadcast the string lenth */
+        ierr = MPI_Bcast(&a_length, 1, MPI_INT, source_image-1, CAF_COMM_WORLD);
+        if (ierr)
+          goto error;
+        /* Broadcast the string itself */
+        ierr = MPI_Bcast(a->base_addr, a_length, datatype, source_image-1, CAF_COMM_WORLD);
+      }
+
+      if (ierr)
+        goto error;
+      return;
+    }
+    else if (datatype == MPI_CHARACTER) /* rank !=0  */
+    {
+        caf_runtime_error ("Co_broadcast of character arrays not yet supported\n");
+    }
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_sr = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (a)-1; j++)
+        {
+          array_offset_sr += ((i / (extent*stride))
+                           % (a->dim[j]._ubound
+                              - a->dim[j].lower_bound + 1))
+                          * a->dim[j]._stride;
+          extent = (a->dim[j]._ubound - a->dim[j].lower_bound + 1);
+          stride = a->dim[j]._stride;
+        }
+      array_offset_sr += (i / extent) * a->dim[rank-1]._stride;
+      void *sr = (void *)((char *) a->base_addr
+                          + array_offset_sr*GFC_DESCRIPTOR_SIZE (a));
+
+      ierr = MPI_Bcast(sr, 1, datatype, source_image-1, CAF_COMM_WORLD);
+
+      if (ierr)
+        goto error;
+    }
+
+  return;
+
+error:
+  /* FIXME: Put this in an extra function and use it elsewhere.  */
+  if (stat)
+    {
+      *stat = ierr;
+      if (!errmsg)
+        return;
+    }
+
+  int len = sizeof (err_buffer);
+  MPI_Error_string (ierr, err_buffer, &len);
+  if (!stat)
+    {
+      err_buffer[len == sizeof (err_buffer) ? len-1 : len] = '\0';
+      caf_runtime_error ("CO_SUM failed with %s\n", err_buffer);
+    }
+  memcpy (errmsg, err_buffer, errmsg_len > len ? len : errmsg_len);
+  if (errmsg_len > len)
+    memset (&errmsg[len], '\0', errmsg_len - len);
+}
+
+void
+PREFIX (co_reduce) (gfc_descriptor_t *a, void *(*opr) (void *, void *), int opr_flags,
+		    int result_image, int *stat, char *errmsg, int a_len, int errmsg_len)
+{
+  MPI_Op op;
+  if(GFC_DESCRIPTOR_TYPE(a) == BT_INTEGER)
+    {
+      foo_int32_t = (typeof(foo_int32_t))opr;
+      MPI_Op_create(redux_int32, 1, &op);
+    }
+  else if(GFC_DESCRIPTOR_TYPE(a) == BT_REAL)
+    {
+      if(GFC_DESCRIPTOR_SIZE(a) == sizeof(float))
+  	{
+  	  foo_float = (typeof(foo_float))opr;
+  	  MPI_Op_create(redux_real32, 1, &op);
+  	}
+      else
+  	{
+  	  foo_double = (typeof(foo_double))opr;
+  	  MPI_Op_create(redux_real64, 1, &op);
+  	}
+    }
+  else if(GFC_DESCRIPTOR_TYPE(a) == BT_LOGICAL)
+    {
+      foo_int32_t = (typeof(foo_int32_t))opr;
+      MPI_Op_create(redux_int32, 1, &op);
+    }
+  else
+    {
+      caf_runtime_error ("Data type not yet supported for co_reduce\n");
+    }
+
+  co_reduce_1 (op, a, result_image, stat, errmsg, 0, errmsg_len);
+}
+
+void
+PREFIX (co_sum) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
+                 int errmsg_len)
+{
+  co_reduce_1 (MPI_SUM, a, result_image, stat, errmsg, 0, errmsg_len);
+}
+
+
+void
+PREFIX (co_min) (gfc_descriptor_t *a, int result_image, int *stat, char *errmsg,
+                 int src_len, int errmsg_len)
+{
+  co_reduce_1 (MPI_MIN, a, result_image, stat, errmsg, src_len, errmsg_len);
+}
+
+
+void
+PREFIX (co_max) (gfc_descriptor_t *a, int result_image, int *stat,
+                 char *errmsg, int src_len, int errmsg_len)
+{
+  co_reduce_1 (MPI_MAX, a, result_image, stat, errmsg, src_len, errmsg_len);
+}
+
+
+/* Locking functions */
+
+void
+PREFIX (lock) (caf_token_t token, size_t index, int image_index,
+               int *acquired_lock, int *stat, char *errmsg,
+               int errmsg_len)
+{
+  int dest_img;
+  MPI_Win *p = token;
+
+  if(image_index == 0)
+    dest_img = caf_this_image;
+  else
+    dest_img = image_index;
+
+  mutex_lock(*p, dest_img, index, stat, acquired_lock, errmsg, errmsg_len);
+}
+
+
+void
+PREFIX (unlock) (caf_token_t token, size_t index, int image_index,
+                 int *stat, char *errmsg, int errmsg_len)
+{
+  int dest_img;
+  MPI_Win *p = token;
+
+  if(image_index == 0)
+    dest_img = caf_this_image;
+  else
+    dest_img = image_index;
+
+  mutex_unlock(*p, dest_img, index, stat, errmsg, errmsg_len);
+}
+
+/* Atomics operations */
+
+void
+PREFIX (atomic_define) (caf_token_t token, size_t offset,
+                        int image_index, void *value, int *stat,
+                        int type __attribute__ ((unused)), int kind)
+{
+  MPI_Win *p = token;
+  MPI_Datatype dt;
+  int ierr = 0;
+  int image;
+
+  if(image_index != 0)
+    image = image_index-1;
+  else
+    image = caf_this_image-1;
+
+  selectType(kind, &dt);
+
+#if MPI_VERSION >= 3
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  ierr = MPI_Accumulate (value, 1, dt, image, offset, 1, dt, MPI_REPLACE, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else // MPI_VERSION
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+  ierr = MPI_Put (value, 1, dt, image, offset, 1, dt, *p);
+  MPI_Win_unlock (image, *p);
+#endif // MPI_VERSION
+
+  if (stat)
+    *stat = ierr;
+  else if (ierr != 0)
+    error_stop (ierr);
+
+  return;
+}
+
+void
+PREFIX(atomic_ref) (caf_token_t token, size_t offset,
+                    int image_index,
+                    void *value, int *stat,
+                    int type __attribute__ ((unused)), int kind)
+{
+  MPI_Win *p = token;
+  MPI_Datatype dt;
+  int ierr = 0;
+  int image;
+
+  if(image_index != 0)
+    image = image_index-1;
+  else
+    image = caf_this_image-1;
+
+  selectType(kind, &dt);
+
+#if MPI_VERSION >= 3
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  ierr = MPI_Fetch_and_op(NULL, value, dt, image, offset, MPI_NO_OP, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else // MPI_VERSION
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+  ierr = MPI_Get (value, 1, dt, image, offset, 1, dt, *p);
+  MPI_Win_unlock (image, *p);
+#endif // MPI_VERSION
+
+  if (stat)
+    *stat = ierr;
+  else if (ierr != 0)
+    error_stop (ierr);
+
+  return;
+}
+
+
+void
+PREFIX(atomic_cas) (caf_token_t token, size_t offset,
+                    int image_index, void *old, void *compare,
+                    void *new_val, int *stat,
+                    int type __attribute__ ((unused)), int kind)
+{
+  MPI_Win *p = token;
+  MPI_Datatype dt;
+  int ierr = 0;
+  int image;
+
+  if(image_index != 0)
+    image = image_index-1;
+  else
+    image = caf_this_image-1;
+
+  selectType (kind, &dt);
+
+#if MPI_VERSION >= 3
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  ierr = MPI_Compare_and_swap (new_val, compare, old, dt, image,
+                               offset, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else // MPI_VERSION
+#warning atomic_cas for MPI-2 is not yet implemented
+  printf ("We apologize but atomic_cas for MPI-2 is not yet implemented\n");
+  ierr = 1;
+#endif // MPI_VERSION
+
+  if (stat)
+    *stat = ierr;
+  else if (ierr != 0)
+    error_stop (ierr);
+
+  return;
+}
+
+void
+PREFIX (atomic_op) (int op, caf_token_t token ,
+                    size_t offset, int image_index,
+                    void *value, void *old, int *stat,
+                    int type __attribute__ ((unused)),
+                    int kind)
+{
+  int ierr = 0;
+  MPI_Datatype dt;
+  MPI_Win *p = token;
+  int image;
+
+#if MPI_VERSION >= 3
+  old = malloc(kind);
+
+  if(image_index != 0)
+    image = image_index-1;
+  else
+    image = caf_this_image-1;
+
+  selectType (kind, &dt);
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  /* Atomic_add */
+      switch(op) {
+          case 1:
+              ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_SUM, *p);
+              break;
+          case 2:
+              ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_BAND, *p);
+              break;
+          case 4:
+              ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_BOR, *p);
+              break;
+          case 5:
+              ierr = MPI_Fetch_and_op(value, old, dt, image, offset, MPI_BXOR, *p);
+              break;
+          default:
+              printf ("We apologize but the atomic operation requested for MPI is not yet implemented\n");
+              break;
+      }
+# ifdef CAF_MPI_LOCK_UNLOCK
+      MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+      MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+
+  free(old);
+#else // MPI_VERSION
+ #warning atomic_op for MPI is not yet implemented
+   printf ("We apologize but atomic_op for MPI is not yet implemented\n");
+#endif // MPI_VERSION
+  if (stat)
+    *stat = ierr;
+  else if (ierr != 0)
+    error_stop (ierr);
+
+  return;
+}
+
+/* Events */
+
+void
+PREFIX (event_post) (caf_token_t token, size_t index,
+		     int image_index, int *stat,
+		     char *errmsg, int errmsg_len)
+{
+  int image, value=1, ierr=0;
+  MPI_Win *p = token;
+  const char msg[] = "Error on event post";
+
+  if(image_index == 0)
+    image = caf_this_image-1;
+  else
+    image = image_index-1;
+
+  if(stat != NULL)
+    *stat = 0;
+
+  #if MPI_VERSION >= 3
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  ierr = MPI_Accumulate (&value, 1, MPI_INT, image, index*sizeof(int), 1, MPI_INT, MPI_SUM, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else // MPI_VERSION
+  #warning Events for MPI-2 are not implemented
+  printf ("Events for MPI-2 are not supported, please update your MPI implementation\n");
+#endif // MPI_VERSION
+  if(ierr != MPI_SUCCESS)
+    {
+      if(stat != NULL)
+	*stat = ierr;
+      if(errmsg != NULL)
+	{
+	  memset(errmsg,' ',errmsg_len);
+	  memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
+	}
+    }
+}
+
+void
+PREFIX (event_wait) (caf_token_t token, size_t index,
+		     int until_count, int *stat,
+		     char *errmsg, int errmsg_len)
+{
+  int ierr=0,count=0,i,image=caf_this_image-1;
+  int *var=NULL,flag,old=0;
+  int newval=0;
+  const int spin_loop_max = 20000;
+  MPI_Win *p = token;
+  const char msg[] = "Error on event wait";
+
+  if(stat != NULL)
+    *stat = 0;
+
+  MPI_Win_get_attr(*p,MPI_WIN_BASE,&var,&flag);
+
+  for(i = 0; i < spin_loop_max; ++i)
+    {
+      MPI_Win_sync(*p);
+      count = var[index];
+      if(count >= until_count)
+	break;
+    }
+
+  i=1;
+  while(count < until_count)
+    /* for(i = 0; i < spin_loop_max; ++i) */
+      {
+	MPI_Win_sync(*p);
+	count = var[index];
+	/* if(count >= until_count) */
+	/*   break; */
+	usleep(5*i);
+	i++;
+      }
+
+  newval = -until_count;
+
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  ierr = MPI_Fetch_and_op(&newval, &old, MPI_INT, image, index*sizeof(int), MPI_SUM, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  if(ierr != MPI_SUCCESS)
+    {
+      if(stat != NULL)
+	*stat = ierr;
+      if(errmsg != NULL)
+	{
+	  memset(errmsg,' ',errmsg_len);
+	  memcpy(errmsg, msg, MIN(errmsg_len,strlen(msg)));
+	}
+    }
+}
+
+void
+PREFIX (event_query) (caf_token_t token, size_t index,
+		      int image_index, int *count, int *stat)
+{
+  int image,ierr=0;
+  MPI_Win *p = token;
+
+  if(image_index == 0)
+    image = caf_this_image-1;
+  else
+    image = image_index-1;
+
+  if(stat != NULL)
+    *stat = 0;
+
+#if MPI_VERSION >= 3
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_lock (MPI_LOCK_EXCLUSIVE, image, 0, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+  ierr = MPI_Fetch_and_op(NULL, count, MPI_INT, image, index*sizeof(int), MPI_NO_OP, *p);
+# ifdef CAF_MPI_LOCK_UNLOCK
+  MPI_Win_unlock (image, *p);
+# else // CAF_MPI_LOCK_UNLOCK
+  MPI_Win_flush (image, *p);
+# endif // CAF_MPI_LOCK_UNLOCK
+#else // MPI_VERSION
+#warning Events for MPI-2 are not implemented
+  printf ("Events for MPI-2 are not supported, please update your MPI implementation\n");
+#endif // MPI_VERSION
+  if(ierr != MPI_SUCCESS && stat != NULL)
+    *stat = ierr;
+}
+
+/* ERROR STOP the other images.  */
+
+static void
+error_stop (int error)
+{
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  /* FIXME: Do some more effort than just gasnet_exit().  */
+  MPI_Abort(CAF_COMM_WORLD, error);
+
+  /* Should be unreachable, but to make sure also call exit.  */
+  exit (error);
+}
+
+/* STOP function for integer arguments.  */
+void
+PREFIX (stop_numeric) (int32_t stop_code)
+{
+  fprintf (stderr, "STOP %d\n", stop_code);
+  PREFIX (finalize) ();
+}
+
+/* STOP function for string arguments.  */
+void
+PREFIX (stop_str) (const char *string, int32_t len)
+{
+  fputs ("STOP ", stderr);
+  while (len--)
+    fputc (*(string++), stderr);
+  fputs ("\n", stderr);
+
+  PREFIX (finalize) ();
+}
+
+/* ERROR STOP function for string arguments.  */
+
+void
+PREFIX (error_stop_str) (const char *string, int32_t len)
+{
+  fputs ("ERROR STOP ", stderr);
+  while (len--)
+    fputc (*(string++), stderr);
+  fputs ("\n", stderr);
+
+  error_stop (1);
+}
+
+
+/* ERROR STOP function for numerical arguments.  */
+
+void
+PREFIX (error_stop) (int32_t error)
+{
+  fprintf (stderr, "ERROR STOP %d\n", error);
+  error_stop (error);
+}
diff --git a/src/single/CMakeLists.txt b/src/single/CMakeLists.txt
new file mode 100644
index 0000000..4b40609
--- /dev/null
+++ b/src/single/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(caf_single single.c ../common/caf_auxiliary.c)
+target_compile_options(caf_single INTERFACE -fcoarray=lib)
+install(TARGETS caf_single EXPORT OpenCoarraysTargets
+  ARCHIVE DESTINATION lib
+)
diff --git a/src/single/Makefile b/src/single/Makefile
new file mode 100644
index 0000000..cf43559
--- /dev/null
+++ b/src/single/Makefile
@@ -0,0 +1,19 @@
+include ../make.inc
+
+libcaf_single.a: single.o ../common/caf_auxiliary.o
+	ar rcv $@ single.o ../common/caf_auxiliary.o
+	ranlib $@
+
+.c.o:
+	$(CC) $(CFLAGS) $(SINGLE_CFLAGS) -I.. -c $< -o $@
+
+single.o: single.c ../libcaf.h ../libcaf-gfortran-descriptor.h
+
+../common/caf_auxiliary.o:
+	$(MAKE) -C ../common
+
+clean:
+	rm -f single.o
+
+distclean: clean
+	rm -f libcaf_single.a
diff --git a/src/single/single.c b/src/single/single.c
new file mode 100644
index 0000000..b032e51
--- /dev/null
+++ b/src/single/single.c
@@ -0,0 +1,727 @@
+/* Single-Image implementation of Libcaf
+
+Copyright (c) 2012-2014, Sourcery, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sourcery, Inc., nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */
+
+#include "libcaf.h"
+#include <stdio.h>  /* For fputs and fprintf.  */
+#include <stdlib.h> /* For exit and malloc.  */
+#include <string.h> /* For memcpy and memset.  */
+#include <stdarg.h> /* For variadic arguments.  */
+
+/* Define GFC_CAF_CHECK to enable run-time checking.  */
+/* #define GFC_CAF_CHECK  1  */
+
+/* Single-image implementation of the CAF library.
+   Note: For performance reasons -fcoarry=single should be used
+   rather than this library.  */
+
+typedef void* single_token_t;
+#define TOKEN(X) ((single_token_t) (X))
+
+
+/* Global variables.  */
+caf_static_t *caf_static_list = NULL;
+
+
+/* Keep in sync with mpi.c.  */
+static void
+caf_runtime_error (const char *message, ...)
+{
+  va_list ap;
+  fprintf (stderr, "Fortran runtime error: ");
+  va_start (ap, message);
+  vfprintf (stderr, message, ap);
+  va_end (ap);
+  fprintf (stderr, "\n");
+
+  /* FIXME: Shutdown the Fortran RTL to flush the buffer.  PR 43849.  */
+  exit (EXIT_FAILURE);
+}
+
+void
+PREFIX(init) (int *argc __attribute__ ((unused)),
+	      char ***argv __attribute__ ((unused)))
+{
+}
+
+
+void
+PREFIX (finalize) (void)
+{
+  while (caf_static_list != NULL)
+    {
+      caf_static_t *tmp = caf_static_list->prev;
+      free (TOKEN (caf_static_list->token));
+      free (caf_static_list);
+      caf_static_list = tmp;
+    }
+}
+
+
+int
+PREFIX (this_image) (int distance __attribute__ ((unused)))
+{
+  return 1;
+}
+
+
+int
+PREFIX (num_images) (int distance __attribute__ ((unused)),
+		    int failed __attribute__ ((unused)))
+{
+  return 1;
+}
+
+
+void *
+PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
+		  int *stat, char *errmsg, int errmsg_len)
+{
+  void *local;
+
+  local = malloc (size);
+  *token = malloc (sizeof (single_token_t));
+
+  if (unlikely (local == NULL || TOKEN (*token) == NULL))
+    {
+      const char msg[] = "Failed to allocate coarray";
+      if (stat)
+	{
+	  *stat = 1;
+	  if (errmsg_len > 0)
+	    {
+	      int len = ((int) sizeof (msg) > errmsg_len) ? errmsg_len
+							  : (int) sizeof (msg);
+	      memcpy (errmsg, msg, len);
+	      if (errmsg_len > len)
+		memset (&errmsg[len], ' ', errmsg_len-len);
+	    }
+	  return NULL;
+	}
+      else
+	  caf_runtime_error (msg);
+    }
+
+  *token = local;
+
+  if (stat)
+    *stat = 0;
+
+  if (type == CAF_REGTYPE_COARRAY_STATIC)
+    {
+      caf_static_t *tmp = malloc (sizeof (caf_static_t));
+      tmp->prev  = caf_static_list;
+      tmp->token = *token;
+      caf_static_list = tmp;
+    }
+  return local;
+}
+
+
+void
+PREFIX (deregister) (caf_token_t *token, int *stat,
+		     char *errmsg __attribute__ ((unused)),
+		     int errmsg_len __attribute__ ((unused)))
+{
+  free (TOKEN (*token));
+
+  if (stat)
+    *stat = 0;
+}
+
+
+static void
+convert_type (void *dst, int dst_type, int dst_kind, void *src,
+		       int src_type, int src_kind)
+{
+#ifdef HAVE_GFC_INTEGER_16
+   typedef __int128 int128t;
+#else
+   typedef int64_t int128t;
+#endif
+
+#if defined(GFC_REAL_16_IS_LONG_DOUBLE)
+   typedef long double real128t;
+   typedef _Complex long double complex128t;
+#elif defined(HAVE_GFC_REAL_16)
+   typedef _Complex float __attribute__((mode(TC))) __complex128;
+   typedef __float128 real128t;
+   typedef __complex128 complex128t;
+#elif defined(HAVE_GFC_REAL_10)
+   typedef long double real128t;
+   typedef long double complex128t;
+#else
+   typedef double real128t;
+   typedef _Complex double complex128t;
+#endif
+
+   int128t int_val = 0;
+   real128t real_val = 0;
+   complex128t cmpx_val = 0;
+
+   switch (src_type)
+     {
+     case BT_INTEGER:
+	if (src_kind == 1)
+	  int_val = *(int8_t*) src;
+	else if (src_kind == 2)
+	  int_val = *(int16_t*) src;
+	else if (src_kind == 4)
+	  int_val = *(int32_t*) src;
+	else if (src_kind == 8)
+	  int_val = *(int64_t*) src;
+#ifdef HAVE_GFC_INTEGER_16
+	else if (src_kind == 16)
+	  int_val = *(int128t*) src;
+#endif
+	else
+	  goto error;
+	break;
+     case BT_REAL:
+	if (src_kind == 4)
+	  real_val = *(float*) src;
+	else if (src_kind == 8)
+	  real_val = *(double*) src;
+#ifdef HAVE_GFC_REAL_10
+	else if (src_kind == 10)
+	  real_val = *(long double*) src;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	else if (src_kind == 16)
+	  real_val = *(real128t*) src;
+#endif
+	else
+	  goto error;
+	break;
+     case BT_COMPLEX:
+	if (src_kind == 4)
+	  cmpx_val = *(_Complex float*) src;
+	else if (src_kind == 8)
+	  cmpx_val = *(_Complex double*) src;
+#ifdef HAVE_GFC_REAL_10
+	else if (src_kind == 10)
+	  cmpx_val = *(_Complex long double*) src;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	else if (src_kind == 16)
+	  cmpx_val = *(complex128t*) src;
+#endif
+	else
+	  goto error;
+	break;
+     default:
+	goto error;
+     }
+
+   switch (dst_type)
+     {
+     case BT_INTEGER:
+	if (src_type == BT_INTEGER)
+	  {
+	    if (dst_kind == 1)
+	      *(int8_t*) dst = (int8_t) int_val;
+	    else if (dst_kind == 2)
+	      *(int16_t*) dst = (int16_t) int_val;
+	    else if (dst_kind == 4)
+	      *(int32_t*) dst = (int32_t) int_val;
+	    else if (dst_kind == 8)
+	      *(int64_t*) dst = (int64_t) int_val;
+#ifdef HAVE_GFC_INTEGER_16
+	    else if (dst_kind == 16)
+	      *(int128t*) dst = (int128t) int_val;
+#endif
+	    else
+	      goto error;
+	  }
+        else if (src_type == BT_REAL)
+	  {
+	    if (dst_kind == 1)
+	      *(int8_t*) dst = (int8_t) real_val;
+	    else if (dst_kind == 2)
+	      *(int16_t*) dst = (int16_t) real_val;
+	    else if (dst_kind == 4)
+	      *(int32_t*) dst = (int32_t) real_val;
+	    else if (dst_kind == 8)
+	      *(int64_t*) dst = (int64_t) real_val;
+#ifdef HAVE_GFC_INTEGER_16
+	    else if (dst_kind == 16)
+	      *(int128t*) dst = (int128t) real_val;
+#endif
+	    else
+	      goto error;
+	  }
+        else if (src_type == BT_COMPLEX)
+	  {
+	    if (dst_kind == 1)
+	      *(int8_t*) dst = (int8_t) cmpx_val;
+	    else if (dst_kind == 2)
+	      *(int16_t*) dst = (int16_t) cmpx_val;
+	    else if (dst_kind == 4)
+	      *(int32_t*) dst = (int32_t) cmpx_val;
+	    else if (dst_kind == 8)
+	      *(int64_t*) dst = (int64_t) cmpx_val;
+#ifdef HAVE_GFC_INTEGER_16
+	    else if (dst_kind == 16)
+	      *(int128t*) dst = (int128t) cmpx_val;
+#endif
+	    else
+	      goto error;
+	  }
+        else
+          goto error;
+	break;
+     case BT_REAL:
+	if (src_type == BT_INTEGER)
+	  {
+	    if (dst_kind == 4)
+	      *(float*) dst = (float) int_val;
+	    else if (dst_kind == 8)
+	      *(double*) dst = (double) int_val;
+#ifdef HAVE_GFC_REAL_10
+	    else if (dst_kind == 10)
+	      *(long double*) dst = (long double) int_val;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	    else if (dst_kind == 16)
+	      *(real128t*) dst = (real128t) int_val;
+#endif
+	    else
+	      goto error;
+	  }
+	else if (src_type == BT_REAL)
+	  {
+	    if (dst_kind == 4)
+	      *(float*) dst = (float) real_val;
+	    else if (dst_kind == 8)
+	      *(double*) dst = (double) real_val;
+#ifdef HAVE_GFC_REAL_10
+	    else if (dst_kind == 10)
+	      *(long double*) dst = (long double) real_val;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	    else if (dst_kind == 16)
+	      *(real128t*) dst = (real128t) real_val;
+#endif
+	    else
+	      goto error;
+	  }
+	else if (src_type == BT_COMPLEX)
+	  {
+	    if (dst_kind == 4)
+	      *(float*) dst = (float) cmpx_val;
+	    else if (dst_kind == 8)
+	      *(double*) dst = (double) cmpx_val;
+#ifdef HAVE_GFC_REAL_10
+	    else if (dst_kind == 10)
+	      *(long double*) dst = (long double) cmpx_val;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	    else if (dst_kind == 16)
+	      *(real128t*) dst = (real128t) cmpx_val;
+#endif
+	    else
+	      goto error;
+	  }
+	break;
+     case BT_COMPLEX:
+	if (src_type == BT_INTEGER)
+	  {
+	    if (dst_kind == 4)
+	      *(_Complex float*) dst = (_Complex float) int_val;
+	    else if (dst_kind == 8)
+	      *(_Complex double*) dst = (_Complex double) int_val;
+#ifdef HAVE_GFC_REAL_10
+	    else if (dst_kind == 10)
+	      *(_Complex long double*) dst = (_Complex long double) int_val;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	    else if (dst_kind == 16)
+	      *(complex128t*) dst = (complex128t) int_val;
+#endif
+	    else
+	      goto error;
+	  }
+	else if (src_type == BT_REAL)
+	  {
+	    if (dst_kind == 4)
+	      *(_Complex float*) dst = (_Complex float) real_val;
+	    else if (dst_kind == 8)
+	      *(_Complex double*) dst = (_Complex double) real_val;
+#ifdef HAVE_GFC_REAL_10
+	    else if (dst_kind == 10)
+	      *(_Complex long double*) dst = (_Complex long double) real_val;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	    else if (dst_kind == 16)
+	      *(complex128t*) dst = (complex128t) real_val;
+#endif
+	    else
+	      goto error;
+	  }
+	else if (src_type == BT_COMPLEX)
+	  {
+	    if (dst_kind == 4)
+	      *(_Complex float*) dst = (_Complex float) cmpx_val;
+	    else if (dst_kind == 8)
+	      *(_Complex double*) dst = (_Complex double) cmpx_val;
+#ifdef HAVE_GFC_REAL_10
+	    else if (dst_kind == 10)
+	      *(_Complex long double*) dst = (_Complex long double) cmpx_val;
+#endif
+#ifdef HAVE_GFC_REAL_16
+	    else if (dst_kind == 16)
+	      *(complex128t*) dst = (complex128t) cmpx_val;
+#endif
+	    else
+	      goto error;
+	  }
+	else
+	  goto error;
+	break;
+     default:
+        goto error;
+     }
+
+error:
+   fprintf (stderr, "RUNTIME ERROR: Cannot convert type %d kind "
+            "%d to type %d kind %d\n", src_type, src_kind, dst_type, dst_kind);
+   PREFIX (error_stop) (1);
+}/* Get a scalar (or contiguous) data from remote image into a buffer.  */
+
+
+void
+PREFIX (get) (caf_token_t token, size_t offset,
+	      int image_index __attribute__ ((unused)),
+	      gfc_descriptor_t *src ,
+	      caf_vector_t *src_vector __attribute__ ((unused)),
+	      gfc_descriptor_t *dest, int src_kind, int dst_kind)
+{
+  /* FIXME: Handle vector subscripts; check whether strings of different
+     kinds are permitted.  */
+  size_t i, k, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+
+  if (rank == 0)
+    {
+      void *sr = (void *) ((char *) TOKEN (token) + offset);
+      if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind)
+	memmove (dest->base_addr, sr,
+		 dst_size > src_size ? src_size : dst_size);
+      else
+	convert_type (dest->base_addr, GFC_DESCRIPTOR_TYPE (dest),
+		      dst_kind, sr, GFC_DESCRIPTOR_TYPE (src), src_kind);
+      if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+	{
+	  if (dst_kind == 1)
+	    memset ((void*)(char*) dest->base_addr + src_size, ' ',
+		    dst_size-src_size);
+	  else /* dst_kind == 4.  */
+	    for (i = src_size/4; i < dst_size/4; i++)
+	      ((int32_t*) dest->base_addr)[i] = (int32_t) ' ';
+	}
+      return;
+    }
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_dst = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset_dst += ((i / (extent*stride))
+			       % (dest->dim[j]._ubound
+				  - dest->dim[j].lower_bound + 1))
+			      * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+      void *dst = dest->base_addr + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest);
+
+      ptrdiff_t array_offset_sr = 0;
+      stride = 1;
+      extent = 1;
+      for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	{
+	  array_offset_sr += ((i / (extent*stride))
+			       % (src->dim[j]._ubound
+				  - src->dim[j].lower_bound + 1))
+			     * src->dim[j]._stride;
+	  extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+	  stride = src->dim[j]._stride;
+	}
+      array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+      void *sr = (void *)((char *) TOKEN (token) + offset
+			  + array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+
+      if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind)
+	memmove (dst, sr, dst_size > src_size ? src_size : dst_size);
+      else
+	convert_type (dst, GFC_DESCRIPTOR_TYPE (dest), dst_kind,
+		      sr, GFC_DESCRIPTOR_TYPE (src), src_kind);
+      if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+	{
+	  if (dst_kind == 1)
+	    memset ((void*)(char*) dst + src_size, ' ', dst_size-src_size);
+	  else /* dst_kind == 4.  */
+	    for (k = src_size/4; k < dst_size/4; i++)
+	      ((int32_t*) dst)[i] = (int32_t) ' ';
+	}
+    }
+}
+
+
+void
+PREFIX (send) (caf_token_t token, size_t offset,
+	       int image_index __attribute__ ((unused)),
+	       gfc_descriptor_t *dest,
+	       caf_vector_t *dst_vector __attribute__ ((unused)),
+	       gfc_descriptor_t *src, int dst_kind,
+	       int src_kind)
+{
+  /* FIXME: Handle vector subscripts; check whether strings of different
+     kinds are permitted.  */
+  size_t i, k, size;
+  int j;
+  int rank = GFC_DESCRIPTOR_RANK (dest);
+  size_t src_size = GFC_DESCRIPTOR_SIZE (src);
+  size_t dst_size = GFC_DESCRIPTOR_SIZE (dest);
+
+  if (rank == 0)
+    {
+      void *dst = (void *) ((char *) TOKEN (token) + offset);
+      if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind)
+	memmove (dst, src->base_addr,
+		 dst_size > src_size ? src_size : dst_size);
+      else
+	convert_type (dst, GFC_DESCRIPTOR_TYPE (dest), dst_kind, src->base_addr,
+		      GFC_DESCRIPTOR_TYPE (src), src_kind);
+      if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+	{
+	  if (dst_kind == 1)
+	    memset ((void*)(char*) dst + src_size, ' ', dst_size-src_size);
+	  else /* dst_kind == 4.  */
+	    for (i = src_size/4; i < dst_size/4; i++)
+	      ((int32_t*) dst)[i] = (int32_t) ' ';
+	}
+      return;
+    }
+
+  size = 1;
+  for (j = 0; j < rank; j++)
+    {
+      ptrdiff_t dimextent = dest->dim[j]._ubound - dest->dim[j].lower_bound + 1;
+      if (dimextent < 0)
+	dimextent = 0;
+      size *= dimextent;
+    }
+
+  if (size == 0)
+    return;
+
+  for (i = 0; i < size; i++)
+    {
+      ptrdiff_t array_offset_dst = 0;
+      ptrdiff_t stride = 1;
+      ptrdiff_t extent = 1;
+      for (j = 0; j < rank-1; j++)
+	{
+	  array_offset_dst += ((i / (extent*stride))
+			       % (dest->dim[j]._ubound
+				  - dest->dim[j].lower_bound + 1))
+			      * dest->dim[j]._stride;
+	  extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1);
+          stride = dest->dim[j]._stride;
+	}
+      array_offset_dst += (i / extent) * dest->dim[rank-1]._stride;
+      void *dst = (void *)((char *) TOKEN (token) + offset
+			   + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest));
+      void *sr;
+      if (GFC_DESCRIPTOR_RANK (src) != 0)
+	{
+	  ptrdiff_t array_offset_sr = 0;
+	  stride = 1;
+	  extent = 1;
+	  for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++)
+	    {
+	      array_offset_sr += ((i / (extent*stride))
+				  % (src->dim[j]._ubound
+				     - src->dim[j].lower_bound + 1))
+				 * src->dim[j]._stride;
+	      extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1);
+	      stride = src->dim[j]._stride;
+	    }
+	  array_offset_sr += (i / extent) * src->dim[rank-1]._stride;
+	  sr = (void *)((char *) src->base_addr
+			+ array_offset_sr*GFC_DESCRIPTOR_SIZE (src));
+	}
+      else
+	sr = src->base_addr;
+
+      if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src)
+	  && dst_kind == src_kind)
+	memmove (dst, sr, dst_size > src_size ? src_size : dst_size);
+      else
+	convert_type (dst, GFC_DESCRIPTOR_TYPE (dest), dst_kind, sr,
+		      GFC_DESCRIPTOR_TYPE (src), src_kind);
+      if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_size > src_size)
+	{
+	  if (dst_kind == 1)
+	    memset ((void*)(char*) dst + src_size, ' ', dst_size-src_size);
+	  else /* dst_kind == 4.  */
+	    for (k = src_size/4; k < dst_size/4; i++)
+	      ((int32_t*) dst)[i] = (int32_t)' ';
+	}
+    }
+}
+
+
+void
+PREFIX (sendget) (caf_token_t dst_token, size_t dst_offset, int dst_image_index,
+		  gfc_descriptor_t *dest, caf_vector_t *dst_vector,
+		  caf_token_t src_token, size_t src_offset,
+		  int src_image_index __attribute__ ((unused)),
+		  gfc_descriptor_t *src,
+		  caf_vector_t *src_vector __attribute__ ((unused)),
+		  int dst_len, int src_len)
+{
+  /* FIXME: Handle vector subscript of 'src_vector'.  */
+  /* For a single image, src->base_addr should be the same as src_token + offset
+     but to play save, we do it properly.  */
+  void *src_base = src->base_addr;
+  src->base_addr = (void *) ((char *) TOKEN (src_token) + src_offset);
+  PREFIX (send) (dst_token, dst_offset, dst_image_index, dest, dst_vector,
+		 src, dst_len, src_len);
+  src->base_addr = src_base;
+}
+
+
+void
+PREFIX (co_sum) (gfc_descriptor_t *a __attribute__ ((unused)),
+		 int result_image __attribute__ ((unused)), int *stat,
+		 char *errmsg __attribute__ ((unused)),
+		 int errmsg_len __attribute__ ((unused)))
+{
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+PREFIX (co_min) (gfc_descriptor_t *a __attribute__ ((unused)),
+		 int result_image __attribute__ ((unused)), int *stat,
+		 char *errmsg __attribute__ ((unused)),
+		 int src_len __attribute__ ((unused)),
+		 int errmsg_len __attribute__ ((unused)))
+{
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+PREFIX (co_max) (gfc_descriptor_t *a __attribute__ ((unused)),
+		 int result_image __attribute__ ((unused)), int *stat,
+		 char *errmsg __attribute__ ((unused)),
+		 int src_len __attribute__ ((unused)),
+		 int errmsg_len __attribute__ ((unused)))
+{
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+PREFIX (sync_all) (int *stat,
+		   char *errmsg __attribute__ ((unused)),
+		   int errmsg_len __attribute__ ((unused)))
+{
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+PREFIX (sync_images) (int count __attribute__ ((unused)),
+		      int images[] __attribute__ ((unused)),
+		      int *stat,
+		      char *errmsg __attribute__ ((unused)),
+		      int errmsg_len __attribute__ ((unused)))
+{
+#ifdef GFC_CAF_CHECK
+  int i;
+
+  for (i = 0; i < count; i++)
+    if (images[i] != 1)
+      {
+	fprintf (stderr, "COARRAY ERROR: Invalid image index %d to SYNC "
+		 "IMAGES", images[i]);
+	exit (EXIT_FAILURE);
+      }
+#endif
+
+  if (stat)
+    *stat = 0;
+}
+
+
+void
+PREFIX (error_stop_str) (const char *string, int32_t len)
+{
+  fputs ("ERROR STOP ", stderr);
+  while (len--)
+    fputc (*(string++), stderr);
+  fputs ("\n", stderr);
+
+  exit (1);
+}
+
+
+void
+PREFIX(error_stop) (int32_t error)
+{
+  fprintf (stderr, "ERROR STOP %d\n", error);
+  exit (error);
+}
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
new file mode 100644
index 0000000..6464617
--- /dev/null
+++ b/src/tests/CMakeLists.txt
@@ -0,0 +1,17 @@
+if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
+  option(HIGH_RESOLUTION_TIMER "use the walltime.o wallclock timer" FALSE)
+  option(LEGACY_ARCHITECTURE "use the SSE-enabled FFT library" FALSE)
+endif()
+
+if (HIGH_RESOLUTION_TIMER)
+  message(WARNING
+    "walltime.o clicks once per clock cycle on an x86 CPU with a 3.6 GHz clock.  "
+    "Because modern processors can throttle their clock spees, do not "
+    "rely on this timer for an absolute value, but it can be useful for "
+    "comparison or relative execution execution times on the same platform."
+    )
+endif()
+
+add_subdirectory(integration)
+add_subdirectory(unit)
+#add_subdirectory(performance)
diff --git a/src/tests/UH_CAF_perf_validation_suite_v1.0.1.txt b/src/tests/UH_CAF_perf_validation_suite_v1.0.1.txt
new file mode 100644
index 0000000..9055966
--- /dev/null
+++ b/src/tests/UH_CAF_perf_validation_suite_v1.0.1.txt
@@ -0,0 +1,7 @@
+Tarball removed.
+
+The UH CAF performance validation suite is still in the git history,
+if it cannot be obtained elsewhere.
+
+The latest official release of this software appears to be at:
+https://github.com/uhhpctools/caf-testsuite/releases/latest
diff --git a/src/tests/integration/CMakeLists.txt b/src/tests/integration/CMakeLists.txt
new file mode 100644
index 0000000..7261163
--- /dev/null
+++ b/src/tests/integration/CMakeLists.txt
@@ -0,0 +1,7 @@
+if (opencoarrays_aware_compiler)
+  add_subdirectory(coarrayHelloWorld)
+  if (NOT (DEFINED ENV{TRAVIS}))
+    add_subdirectory(dist_transpose )
+  endif()
+  add_subdirectory(pde_solvers)
+endif()
diff --git a/src/tests/integration/coarrayHelloWorld/CMakeLists.txt b/src/tests/integration/coarrayHelloWorld/CMakeLists.txt
new file mode 100644
index 0000000..9bcf8bb
--- /dev/null
+++ b/src/tests/integration/coarrayHelloWorld/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(hello_multiverse hello_multiverse.F90)
+target_link_libraries(hello_multiverse OpenCoarrays)
diff --git a/src/tests/integration/coarrayHelloWorld/Makefile b/src/tests/integration/coarrayHelloWorld/Makefile
new file mode 100644
index 0000000..ca50b17
--- /dev/null
+++ b/src/tests/integration/coarrayHelloWorld/Makefile
@@ -0,0 +1,16 @@
+# Edit the line below to the opencarrays installation directory
+opencoarrays=/opt/opencoarrays
+executable=hello_multiverse
+opt=-L $(opencoarrays)/lib
+compile=mpif90 -fcoarray=lib
+lib=-lcaf_mpi
+
+$(executable): hello_multiverse.o  Makefile
+	$(compile) $(opt) hello_multiverse.o -o $(executable) $(lib)
+
+hello_multiverse.o: hello_multiverse.F90  Makefile
+	$(compile) -c hello_multiverse.F90
+
+.PHONY : clean
+clean :
+	-rm -f  *.o *.mod $(executable)
diff --git a/src/tests/integration/coarrayHelloWorld/hello_multiverse.F90 b/src/tests/integration/coarrayHelloWorld/hello_multiverse.F90
new file mode 100644
index 0000000..8114e63
--- /dev/null
+++ b/src/tests/integration/coarrayHelloWorld/hello_multiverse.F90
@@ -0,0 +1,60 @@
+! Coarray "Hello, world!" test program
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+! Robodoc header:
+!****e* tests/coarrayHelloWorld/hello_multiverse.F90
+! NAME
+!   hello_multiverse
+! SYNOPSIS
+!   Demonstrate coarray communication via a scalar character coarray.
+! INPUTS
+!   None.
+! OUTPUTS
+!   Test pass or failure.
+!******
+
+program hello_multiverse
+  implicit none
+  integer, parameter :: MAX_STRING=100
+  character(len=MAX_STRING) :: greeting[*] ! Scalar coarray
+  integer image
+  write(greeting,"(2(a,i2))") "Greetings from image ",this_image()," of ",num_images()
+  sync all ! Barrier
+  if (this_image()==1) then
+    do concurrent (image=1:num_images())
+      print *,greeting[image]
+    end do
+    block
+      integer, parameter :: expected_location=23,max_single_digit=9
+      do image=2,min(num_images(),max_single_digit)
+        ! Verify that the greetings of images 1-9 have their image number at the expected location:
+        if (scan(greeting[image],set="123456789")/=expected_location) error stop "Test failed."
+      end do
+    end block
+    print *,"Test passed."
+  end if
+end program
diff --git a/src/tests/integration/dist_transpose/CMakeLists.txt b/src/tests/integration/dist_transpose/CMakeLists.txt
new file mode 100644
index 0000000..807ba4d
--- /dev/null
+++ b/src/tests/integration/dist_transpose/CMakeLists.txt
@@ -0,0 +1,9 @@
+if (HIGH_RESOLUTION_TIMER)
+  add_definitions(-DHAVE_WALLTIME)
+  set(walltime_o walltime.o)
+endif()
+add_executable(coarray_distributed_transpose
+  coarray_distributed_transpose.F90
+  ${walltime_o}
+  )
+target_link_libraries(coarray_distributed_transpose OpenCoarrays)
diff --git a/src/tests/integration/dist_transpose/Makefile_NS_GASNET b/src/tests/integration/dist_transpose/Makefile_NS_GASNET
new file mode 100644
index 0000000..050a02a
--- /dev/null
+++ b/src/tests/integration/dist_transpose/Makefile_NS_GASNET
@@ -0,0 +1,32 @@
+# Choose desired compiler (options: gnu, intel, cray):
+include /scratch2/scratchdirs/afanfa/GASNet-1.22.4/aries-conduit/aries-par.mak #/scratch/scratchdirs/afanfa/GASNet-1.22.4/gemini-conduit/gemini-par.mak
+compiler=gnu
+executable=coarray_distributed_transpose
+
+opencoarrays_dir=/global/u1/a/afanfa/Coarray/opencoarrays/gasnet
+
+ifeq ($(compiler),gnu)
+  opt=-L $(opencoarrays_dir)
+  compile=/global/u1/a/afanfa/gcc/bin/gfortran -Ofast -fcoarray=lib
+  lib=-lcaf_gasnet
+else
+  ifeq ($(compiler),intel)
+    compile=ifort -coarray=shared -standard-semantics -O3 -coarray-num-images=2
+  else
+    ifeq ($(compiler),cray)
+      compile=ftn -ew -h caf
+    endif
+  endif
+endif
+
+objects = walltime.o
+
+$(executable): coarray_distributed_transpose.o $(objects)
+	$(GASNET_LD) $(GASNET_LDFLAGS) $(LIBCAF_FLAGS) $(opt) coarray_distributed_transpose.o $(objects) -lgfortran -lm -o $(executable) $(lib) $(GASNET_LIBS)
+
+coarray_distributed_transpose.o: coarray_distributed_transpose.f90 $(objects)
+	$(compile) -cpp -c coarray_distributed_transpose.f90
+
+.PHONY : clean
+clean :
+	-rm -f  coarray_distributed_transpose.o *.mod $(executable) core fort.*
diff --git a/src/tests/integration/dist_transpose/coarray_distributed_transpose.F90 b/src/tests/integration/dist_transpose/coarray_distributed_transpose.F90
new file mode 100644
index 0000000..7341ac4
--- /dev/null
+++ b/src/tests/integration/dist_transpose/coarray_distributed_transpose.F90
@@ -0,0 +1,309 @@
+! Coarray Distributed Transpose Test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+! Robodoc header:
+!****m* dist_transpose/run_size
+! NAME
+!   run_size
+!  SYNOPSIS
+!   Encapsulate problem state, wall-clock timer interface, integer broadcasts, and a data copy.
+!******
+!==================  test transposes with integer x,y,z values  ===============================
+module run_size
+    use iso_fortran_env
+#ifndef HAVE_WALLTIME
+    use MPI, only : WALLTIME=>MPI_WTIME
+#endif
+    implicit none
+        integer(int64), codimension[*] :: nx, ny, nz
+        integer(int64), codimension[*] :: my, mx, first_y, last_y, first_x, last_x
+        integer(int64) :: my_node, num_nodes
+        real(real64), codimension[*] :: tran_time
+
+#ifdef HAVE_WALLTIME
+interface
+   function WALLTIME() bind(C, name = "WALLTIME")
+   use iso_fortran_env
+       real(real64) :: WALLTIME
+   end function WALLTIME
+end interface
+#endif
+
+contains
+
+!****s* run_size/broadcast_int
+! NAME
+!   broadcast_int
+!  SYNOPSIS
+!   Broadcast a scalar coarray integer from image 1 to all other images.
+!******
+    subroutine broadcast_int( variable )
+        integer(int64), codimension[*] :: variable
+        integer(int64) :: i
+        if( my_node == 1 ) then
+            do i = 2, num_nodes;    variable[i] = variable;      end do
+        end if
+    end subroutine broadcast_int
+
+subroutine copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
+  implicit none
+  complex, intent(in)  :: A(0:*)
+  complex, intent(out) :: B(0:*)
+  integer(int64), intent(in) :: n1, sA1, sB1
+  integer(int64), intent(in) :: n2, sA2, sB2
+  integer(int64), intent(in) :: n3, sA3, sB3
+  integer(int64) i,j,k
+
+  do k=0,n3-1
+     do j=0,n2-1
+        do i=0,n1-1
+           B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
+        end do
+     end do
+  end do
+end subroutine copy3
+
+end module run_size
+
+!****e* dist_transpose/coarray_distributed_transpose
+! NAME
+!   coarray_distributed_transpose
+! SYNOPSIS
+!   This program tests the transpose routines used in Fourier-spectral simulations of homogeneous turbulence.
+!   The data is presented to the physics routines as groups of y-z or x-z planes distributed among the images.
+!   The (out-of-place) transpose routines do the x <--> y transposes required and consist of transposes within
+!   data blocks (intra-image) and a transpose of the distribution of these blocks among the images (inter-image).
+!
+!   Two methods are tested here:
+!   RECEIVE: receive block from other image and transpose it
+!   SEND:    transpose block and send it to other image
+!
+!   This code is the coarray analog of mpi_distributed_transpose.
+!******
+
+program coarray_distributed_transpose
+  !(***********************************************************************************************************
+  !                   m a i n   p r o g r a m
+  !***********************************************************************************************************)
+      use run_size
+      implicit none
+
+      complex, allocatable ::  u(:,:,:,:)[:]    ! u(nz,4,first_x:last_x,ny)[*]    !(*-- ny = my * num_nodes --*)
+      complex, allocatable ::  ur(:,:,:,:)[:]   !ur(nz,4,first_y:last_y,nx/2)[*]  !(*-- nx/2 = mx * num_nodes --*)
+      complex, allocatable :: bufr_X_Y(:,:,:,:)
+      complex, allocatable :: bufr_Y_X(:,:,:,:)
+      integer(int64) :: x, y, z, msg_size, iter
+
+      num_nodes = num_images()
+      my_node = this_image()
+
+      if( my_node == 1 ) then
+           !write(6,*) "nx,ny,nz : ";      read(5,*) nx, ny, nz
+            nx=32; ny=32; nz=32
+            call broadcast_int( nx );        call broadcast_int( ny );        call broadcast_int( nz );
+       end if
+      sync all  !-- other nodes wait for broadcast!
+
+
+      if ( mod(ny,num_nodes) == 0)  then;   my = ny / num_nodes
+                                    else;   write(6,*) "node ", my_node, " ny not multiple of num_nodes";     error stop
+      end if
+
+      if ( mod(nx/2,num_nodes) == 0)  then;   mx = nx/2 / num_nodes
+                                    else;   write(6,*) "node ", my_node, "nx/2 not multiple of num_nodes";     error stop
+      end if
+
+      first_y = (my_node-1)*my + 1;   last_y  = (my_node-1)*my + my
+      first_x = (my_node-1)*mx + 1;   last_x  = (my_node-1)*mx + mx
+
+      allocate (  u(nz , 4 , first_x:last_x , ny)  [*] )   !(*-- y-z planes --*)
+      allocate ( ur(nz , 4 , first_y:last_y , nx/2)[*] )   !(*-- x-z planes --*)
+      allocate ( bufr_X_Y(nz,4,mx,my) )
+      allocate ( bufr_Y_X(nz,4,my,mx) )
+
+      msg_size = nz*4*mx*my     !-- message size (complex data items)
+
+!---------  initialize data u (mx y-z planes per image) ----------
+
+        do x = first_x, last_x
+            do y = 1, ny
+                do z = 1, nz
+                    u(z,1,x,y) = x
+                    u(z,2,x,y) = y
+                    u(z,3,x,y) = z
+                end do
+            end do
+        end do
+
+    tran_time = 0
+    do iter = 1, 2  !--- 2 transform pairs per second-order time step
+
+!---------  transpose data u -> ur (mx y-z planes to my x-z planes per image)  --------
+
+      ur = 0
+
+      call transpose_X_Y
+
+!--------- test data ur (my x-z planes per image) ----------
+
+        do x = 1, nx/2
+            do y = first_y, last_y
+                do z = 1, nz
+                    if ( real(ur(z,1,y,x)) /= x .or. real(ur(z,2,y,x)) /= y .or. real(ur(z,3,y,x)) /= z )then
+                        write(6,fmt="(A,i3,3(6X,A,f7.3,i4))") "transpose_X_Y failed:  image ", my_node &
+                            , " X ",real(ur(z,1,y,x)),x, "  Y ",real(ur(z,2,y,x)),y, "  Z ", real(ur(z,3,y,x)),z
+                        stop
+                    end if
+                end do
+            end do
+        end do
+
+!---------  transpose data ur -> u (my x-z planes to mx y-z planes per image)  --------
+
+      u = 0
+      call transpose_Y_X
+
+!--------- test data u (mx y-z planes per image) ----------
+
+        do x = first_x, last_x
+            do y = 1, ny
+                do z = 1, nz
+                    if ( real(u(z,1,x,y)) /= x .or. real(u(z,2,x,y)) /= y .or. real(u(z,3,x,y)) /= z )then
+                        write(6,fmt="(A,i3,3(6X,A,f7.3,i4))") "transpose_Y_X failed:  image ", my_node &
+                            , " X ",real(u(z,1,x,y)),x, "  Y ",real(u(z,2,x,y)),y, "  Z ", real(u(z,3,x,y)),z
+                        stop
+                    end if
+                end do
+            end do
+        end do
+    end do
+
+        sync all
+        if( my_node == 1 )  write(6,fmt="(A,f8.3)")  "test passed:  tran_time ", tran_time
+
+    deallocate ( bufr_X_Y );    deallocate ( bufr_Y_X )
+
+!=========================   end of main executable  =============================
+
+contains
+
+!-------------   out-of-place transpose data_s --> data_r  ----------------------------
+
+ subroutine transpose_X_Y
+
+    use run_size
+    implicit none
+
+    integer(int64) :: i,stage
+
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (    u(1,1,first_x,1+(my_node-1)*my) &                   !-- intra-node transpose
+                ,  ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- no inter-node transpose needed
+                ,   nz*3, 1_8, 1_8        &                                 !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+#define RECEIVE
+#ifdef RECEIVE
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        bufr_X_Y(:,:,:,:) = u(:,:,:,1+(my_node-1)*my:my_node*my)[i]         !-- inter-node transpose to buffer
+        call copy3 ( bufr_X_Y, ur(1,1,first_y,1+(i-1)*mx)  &                !-- intra-node transpose from buffer
+                        ,   nz*3, 1_8, 1_8        &                             !-- note: only 3 of 4 words needed
+                        ,   mx, nz*4, nz*4*my &
+                        ,   my, nz*4*mx, nz*4 )
+    end do
+
+#else
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        call  copy3 ( u(1,1,first_x,1+(i-1)*my), bufr_Y_X   &        !-- intra-node transpose to buffer
+                    ,   nz*3, 1_8, 1_8        &
+                    ,   mx, nz*4, nz*4*my &
+                    ,   my, nz*4*mx, nz*4 )
+        ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i] = bufr_Y_X(:,:,:,:)        !-- inter-node transpose from buffer
+    end do
+
+#endif
+
+    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+
+ end  subroutine transpose_X_Y
+
+!-------------   out-of-place transpose data_r --> data_s  ----------------------------
+
+subroutine transpose_Y_X
+    use run_size
+    implicit none
+
+    integer(int64) :: i, stage
+
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (   ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- intra-node transpose
+                ,   u(1,1,first_x,1+(my_node-1)*my) &                   !-- no inter-node transpose needed
+                ,   nz*4, 1_8, 1_8        &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+#define RECEIVE
+#ifdef RECEIVE
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        bufr_Y_X(:,:,:,:) = ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i]        !-- inter-node transpose to buffer
+        call copy3 ( bufr_Y_X, u(1,1,first_x,1+(i-1)*my)  &                 !-- intra-node transpose from buffer
+                    ,   nz*4, 1_8, 1_8        &
+                    ,   my, nz*4, nz*4*mx &
+                    ,   mx, nz*4*my, nz*4 )
+    end do
+
+#else
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        call copy3 ( ur(1,1,first_y,1+(i-1)*mx), bufr_X_Y  &                 !-- intra-node transpose from buffer
+                    ,   nz*4, 1_8, 1_8        &
+                    ,   my, nz*4, nz*4*mx &
+                    ,   mx, nz*4*my, nz*4 )
+        u(:,:,:,1+(my_node-1)*my:my_node*my)[i] = bufr_X_Y(:,:,:,:)        !-- inter-node transpose from buffer
+    end do
+
+#endif
+
+    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+
+ end  subroutine transpose_Y_X
+
+
+end program coarray_distributed_transpose
diff --git a/src/tests/integration/dist_transpose/walltime.o b/src/tests/integration/dist_transpose/walltime.o
new file mode 100644
index 0000000..33016a9
Binary files /dev/null and b/src/tests/integration/dist_transpose/walltime.o differ
diff --git a/src/tests/integration/dist_transpose/walltime.x64 b/src/tests/integration/dist_transpose/walltime.x64
new file mode 100644
index 0000000..dd89ad2
--- /dev/null
+++ b/src/tests/integration/dist_transpose/walltime.x64
@@ -0,0 +1,20 @@
+	MHz := 3600.e6         !--- clock frequency of i7-3820
+
+	program
+
+	time :	double = 1./MHz     !-- time/tick
+
+	entry "WALLTIME"
+	entry "_WALLTIME"
+
+	RAX,RDX = TSC       !-- read time-stamp counter ticks
+	RDX &= $000fffff    !-- convert tricks to 64-bit float
+	RDX |= $43300000
+	RDX <<= 32
+	RDX |= RAX
+
+	[RSP-8] = RDX       !-- multiply ticks by time/tick
+    LO XM0 = [RSP-8]
+    XM0.v1 *= time
+
+	RET
diff --git a/src/tests/integration/pde_solvers/CMakeLists.txt b/src/tests/integration/pde_solvers/CMakeLists.txt
new file mode 100644
index 0000000..29805c3
--- /dev/null
+++ b/src/tests/integration/pde_solvers/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory( coarrayBurgers )
+add_subdirectory( navier-stokes )
+add_subdirectory( coarrayHeatSimplified )
diff --git a/src/tests/integration/pde_solvers/README.md b/src/tests/integration/pde_solvers/README.md
new file mode 100644
index 0000000..0709f97
--- /dev/null
+++ b/src/tests/integration/pde_solvers/README.md
@@ -0,0 +1,50 @@
+[This document is formatted with GitHub-Flavored Markdown.   ]:#
+[For better viewing, including hyperlinks, read it online at ]:#
+[https://github.com/sourceryinstitute/opencoarrays/edit/master/src/tests/integration/pde_solvers/README.txt]:#
+
+Partial Differential Equation (PDE) Solvers
+===========================================
+
+This directory contains three PDE solvers listed here in order from simplest to most complex:
+
+* A one-dimensional (1D) finite-difference, unsteady [heat equation solver],
+* A 1D finite-difference, unsteady, nonlinear [Burgers equation solver], and
+* A three-dimensional (3D), unsteady, spectral [Navier-Stokes equation solver].
+
+The first two solvers contain correctness checks that result in the printing of the 
+message "Test passed" before terminating a correct execution.  For more details on the 
+heat equation solver please view the [Sourcery Institute] [tutorial videos] online.  
+
+For more details on the Burgers solver, please see Chapter 12 of the textbook 
+[Scientific Sofware Design] or the open-access journal article 
+"[High Performance Design Patterns for Modern Fortran]."  The [coarrayBurgers] 
+subdirectory includes a [run.sh] launch script that works inside the open-source 
+Linux virtual machine available in the Sourcery Institute [store].
+
+The launch script instruments the Burgers solver for performance analysis using the 
+open-source Tuning and Analysis Utilities ([TAU]) package.  The instrumented Burgers 
+solver has been demonstrated to execute with 87% parallel efficiency on 16,384
+cores in weak scaling when compiled with the Cray Compiler Environment.  For
+new scalabiliby studies, it is important to run problems of sufficient size.  For
+performance and complexity comparisons, an MPI version of the Burgers solver is
+in the [performance] directory at the same level as the current directory.
+
+The Navier-Stokes solver uses Fourier-spectral methods and Runge-Kutta time advancement
+to simulate the evolution of statistically homogeneous turbulent flow in a 3D box with 
+periodic boundary conditions.  For performance and complexity comparisons, the 
+[navier-stokes] subdirectory contains both a  Message Passing Interface (MPI) version 
+and a coarray Fortran (CAF) version of the same solution algorithm.
+
+[heat equation solver]: ./coarrayHeatSimplified
+[Burgers equation solver]: ./coarrayBurgers
+[Navier-Stokes equation solver]: ./navier-stokes
+[Sourcery Institute]: http://www.sourceryinstitute.org
+[tutorial videos]: http://www.sourceryinstitute.org/videos
+[Scientific Sofwtware Design]: http://www.cambridge.org/rouson
+[High Performance Design Patterns for Modern Fortran]: http://www.hindawi.com/journals/sp/2015/942059/
+[store]: http://www.sourceryinstitute.org/store
+[coarrayBurgers]: ./coarrayBurgers
+[run.sh]: ./coarrayBurgers/run.sh
+[TAU]: http://tau.uoregon.edu
+[navier-stokes]: ./navier-stokes
+[performance]: ../../performance
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/CMakeLists.txt b/src/tests/integration/pde_solvers/coarrayBurgers/CMakeLists.txt
new file mode 100644
index 0000000..a5d09dd
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/CMakeLists.txt
@@ -0,0 +1,30 @@
+set(include_directory ${CMAKE_CURRENT_SOURCE_DIR}/include-files)
+set(library_directory ${CMAKE_CURRENT_SOURCE_DIR}/library)
+set(config_directory  ${CMAKE_CURRENT_BINARY_DIR}/library)
+
+if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Cray")
+  configure_file(${include_directory}/cray_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel")
+  configure_file(${include_directory}/intel_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU")
+  configure_file(${include_directory}/gfortran_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "VisualAge|XL")
+  configure_file(${include_directory}/ibm_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "PGI")
+  configure_file(${include_directory}/portlandgroup_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "NAG")
+  configure_file(${include_directory}/nag_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+else()
+  message ("Unknown Fortran compiler: ${CMAKE_Fortran_COMPILER_ID}")
+endif()
+
+add_executable(coarray_burgers_pde
+  main.F90
+  global_field.F90
+  local_field.F90
+  ${library_directory}/ForTrilinos_assertion_utility.F90
+  ${library_directory}/object_interface.F90
+  ${library_directory}/co_object_interface.F90
+)
+target_link_libraries(coarray_burgers_pde OpenCoarrays)
+target_include_directories(coarray_burgers_pde PRIVATE ${config_directory})
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/Makefile b/src/tests/integration/pde_solvers/coarrayBurgers/Makefile
new file mode 100644
index 0000000..d5c7e6b
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/Makefile
@@ -0,0 +1,99 @@
+# Choose desired platform (options: virtual, cees):
+machine=virtual
+# Choose desired compiler (options: gnu, intel, cray, pg, ibm, nag, tau_intel, tau_aciss, tau_cray):
+compiler=gnu
+
+executable=burgers
+library_path=./library
+include_path=./include-files
+
+ifeq ($(compiler),gnu)
+# Use these two lines to compile executables that will ONLY work in single-image runs
+# lib=-lcaf_single
+# compile=gfortran -fcoarray=single
+  lib=-lcaf_mpi
+  compile=caf -fcoarray=lib
+  compile=/Users/rouson/Code/builds/oc/bin/caf -fcoarray=lib
+  ifeq ($(machine),virtual)
+    opt=-L ../../../../mpi
+  else
+    ifeq ($(machine),cees)
+    opt=-L /opt/opencoarrays-src/mpi
+    endif
+  endif
+else
+  ifeq ($(compiler),intel)
+    compile=ifort -standard-semantics -O3
+  else
+    ifeq ($(compiler),cray)
+      compile=ftn -ew
+    else
+      ifeq ($(compiler),tau_intel)
+        compile=tauF90 -tau:serial,icpc,pdt  -standard-semantics -O3 -DTAU=1
+      else
+        ifeq ($(compiler),tau_aciss)
+          compile=tau_F90.sh -standard-semantics -O3
+        else
+          ifeq ($(compiler),tau_cray)
+            compile=tau_F90.sh -optCompInst -ew
+          else
+            ifeq ($(compiler),pg)
+              compile=pgfortran -Mallocatable=03
+            else
+              ifeq ($(compiler),ibm)
+                compile=xlf2003
+              else
+                ifeq ($(compiler),nag)
+                  compile=nagfor -f2008 -fpp -C=all
+                endif
+              endif
+            endif
+          endif
+        endif
+      endif
+    endif
+  endif
+endif
+
+objects = local_field.o global_field.o co_object_interface.o object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o
+
+$(executable): main.o $(objects) Makefile
+	$(compile) $(opt)  main.o $(objects) -o $(executable)  $(lib)
+
+main.o: main.F90 $(objects) ForTrilinos_assertion_utility.o ForTrilinos_error.o global_field.o Makefile
+	$(compile)  -c main.F90
+
+global_field.o: global_field.F90 local_field.o co_object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o preprocessor_definitions Makefile
+	$(compile)  -c global_field.F90
+
+local_field.o: local_field.F90 object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o Makefile
+	$(compile)  -c local_field.F90
+
+object_interface.o: $(library_path)/object_interface.F90 preprocessor_definitions Makefile
+	$(compile)  -c $(library_path)/object_interface.F90
+
+co_object_interface.o: $(library_path)/co_object_interface.F90 Makefile
+	$(compile)  -c $(library_path)/co_object_interface.F90
+
+ForTrilinos_assertion_utility.o: $(library_path)/ForTrilinos_assertion_utility.F90 Makefile
+	$(compile)  -c $(library_path)/ForTrilinos_assertion_utility.F90
+
+ForTrilinos_error.o: $(library_path)/ForTrilinos_error.F90 Makefile
+	$(compile)  -c $(library_path)/ForTrilinos_error.F90
+
+.PHONY : clean
+clean :
+	-rm -f  *.o *.mod $(executable) core fort.* compiler_capabilities.txt $(library_path)/compiler_capabilities.txt profile.* *.ppk
+
+preprocessor_definitions:
+ifeq ($(compiler),gnu)
+	cp $(include_path)/gfortran_capabilities.txt $(library_path)/compiler_capabilities.txt
+else
+  ifeq ($(compiler),intel)
+	cp $(include_path)/intel_capabilities.txt $(library_path)/compiler_capabilities.txt
+  else
+    ifeq ($(compiler),cray)
+	cp $(include_path)/cray_capabilities.txt $(library_path)/compiler_capabilities.txt
+    endif
+  endif
+endif
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/Makefile.inst b/src/tests/integration/pde_solvers/coarrayBurgers/Makefile.inst
new file mode 100644
index 0000000..97b7194
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/Makefile.inst
@@ -0,0 +1,110 @@
+# Choose desired platform (options: virtual, cees):
+machine=virtual
+# Choose desired compiler (options: gnu, intel, cray, tau_intel):
+compiler=tau_gnu
+
+executable=burgers
+mofo_root=../../../../../
+library_path=./library
+include_path=./include-files
+
+ifeq ($(compiler),gnu)
+  lib=-lcaf_mpi
+  compile=mpif90 -fcoarray=lib
+  ifeq ($(machine),virtual)
+    libpath=-L /opt/opencoarrays/lib
+  else
+    ifeq ($(machine),cees)
+      libpath=-L /opt/opencoarrays-src/mpi
+    endif
+  endif
+else
+  ifeq ($(compiler),tau_gnu)
+    compile=tau_f90.sh -L/opt/opencoarrays/lib -fcoarray=lib -DTAU=1 -O3 -g
+    lib=-lcaf_mpi
+    libpath=
+  else
+    ifeq ($(compiler),intel)
+      compile=ifort -standard-semantics -O3 
+    else
+      ifeq ($(compiler),cray)
+        compile=ftn -ew 
+      else
+        ifeq ($(compiler),tau_intel)
+          compile=tauf90 -tau:serial,icpc,pdt  -standard-semantics -O3 -DTAU=1
+        else
+          ifeq ($(compiler),tau_aciss)
+            compile=tau_f90.sh -standard-semantics -O3
+          else
+            ifeq ($(compiler),tau_cray)
+            compile=tau_f90.sh -optCompInst -ew 
+            else
+              ifeq ($(compiler),pg)
+                compile=pgfortran -Mallocatable=03
+              else
+                ifeq ($(compiler),ibm)
+                  compile=xlf2003
+                else
+                  ifeq ($(compiler),nag)
+                    compile=nagfor -f2008 -fpp -C=all
+                  endif
+                endif
+              endif
+            endif
+          endif
+        endif
+      endif
+    endif
+  endif
+endif
+
+objects = local_field.o global_field.o co_object_interface.o object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o 
+
+$(executable): main.o $(objects) Makefile
+	$(compile) $(opt)  main.o $(objects) -o $(executable)  $(lib)
+
+main.o: main.F90 $(objects) Makefile
+	$(compile)  -c main.F90
+
+global_field.o: global_field.F90 local_field.o co_object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o preprocessor_definitions Makefile
+	$(compile)  -c global_field.F90
+
+local_field.o: local_field.F90 object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o Makefile
+	$(compile)  -c local_field.F90
+
+object_interface.o: $(library_path)/object_interface.F90 preprocessor_definitions Makefile 
+	$(compile)  -c $(library_path)/object_interface.F90
+
+co_object_interface.o: $(library_path)/co_object_interface.F90 Makefile
+	$(compile)  -c $(library_path)/co_object_interface.F90
+
+ForTrilinos_assertion_utility.o: $(library_path)/ForTrilinos_assertion_utility.F90 Makefile
+	$(compile)  -c $(library_path)/ForTrilinos_assertion_utility.F90
+
+ForTrilinos_error.o: $(library_path)/ForTrilinos_error.F90 Makefile
+	$(compile)  -c $(library_path)/ForTrilinos_error.F90
+
+.PHONY : clean
+clean :
+	-rm -f  *.o *.mod $(executable) core fort.* compiler_capabilities.txt $(library_path)/compiler_capabilities.txt profile.* *.ppk 
+
+preprocessor_definitions: 
+ifeq ($(compiler),gnu)
+	cp $(include_path)/gfortran_capabilities.txt compiler_capabilities.txt
+	cp $(include_path)/gfortran_capabilities.txt $(library_path)/compiler_capabilities.txt
+else
+  ifeq ($(compiler),tau_gnu)
+	cp $(include_path)/gfortran_capabilities.txt compiler_capabilities.txt
+	cp $(include_path)/gfortran_capabilities.txt $(library_path)/compiler_capabilities.txt
+  else
+    ifeq ($(compiler),intel)
+	cp $(include_path)/intel_capabilities.txt compiler_capabilities.txt
+	cp $(include_path)/intel_capabilities.txt $(library_path)/compiler_capabilities.txt
+    else
+      ifeq ($(compiler),cray)
+	cp $(include_path)/cray_capabilities.txt compiler_capabilities.txt
+	cp $(include_path)/cray_capabilities.txt $(library_path)/compiler_capabilities.txt
+      endif
+    endif
+  endif
+endif
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/global_field.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/global_field.F90
new file mode 100644
index 0000000..c8889f7
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/global_field.F90
@@ -0,0 +1,199 @@
+module global_field_module
+  use iso_fortran_env, only : real64,int64
+  use co_object_interface, only : co_object
+  use ForTrilinos_assertion_utility, only : assert,error_message
+  use local_field_module, only : local_field
+  implicit none
+  private
+  public :: global_field,initial_condition
+
+  type, extends(co_object) :: global_field
+    private
+    real(real64), allocatable :: values(:)[:]
+  contains
+    procedure :: set
+    procedure :: state
+    procedure :: x
+    procedure :: xx
+    procedure, nopass :: grid_spacing
+    procedure, private :: assign_local_field
+    procedure, private :: add_local_field
+    procedure, private :: multiply
+    generic :: operator(*) => multiply
+    generic :: operator(+) => add_local_field
+    generic :: assignment(=) => assign_local_field
+    procedure :: output
+  end type
+
+  abstract interface
+    pure function initial_condition(x) result(initial_values)
+      import :: real64
+      real(real64), intent(in) :: x
+      real(real64) :: initial_values
+    end function
+  end interface
+
+  real(real64), allocatable :: dx
+  integer(int64), allocatable :: num_global_points,num_local_points
+  integer(int64), parameter:: num_end_points=2_int64
+  real(real64) :: boundary_vals(num_end_points)
+
+contains
+
+  function grid_spacing() result(delta_x)
+    real(real64) :: delta_x
+    call assert(allocated(dx),error_message("global_field%grid_spacing: dx not allocated"))
+    delta_x = dx
+  end function
+
+  pure function state(this) result(local_values)
+    class(global_field), intent(in) :: this
+    real(real64), allocatable :: local_values(:)
+    ! Requires
+    if (this%user_defined()) then
+      local_values = this%values
+    end if
+  end function
+
+  subroutine synchronize()
+    if (num_images()>1) then
+      associate(me=>this_image())
+        if (me==1) then
+          sync images(me+1)
+        else if (me==num_images()) then
+          sync images(me-1)
+        else
+          sync images([me-1,me+1])
+        end if
+      end associate
+    end if
+  end subroutine
+
+  subroutine set(this,initial_function,num_points)
+    class(global_field), intent(inout) :: this
+    integer, intent(in) :: num_points
+    procedure(initial_condition), pointer :: initial_function
+    integer(int64) :: num_intervals,i
+    real(real64), parameter :: two_pi=2.*3.1415926535897932384626433832795028842_real64
+    real(real64), allocatable :: local_grid(:)
+
+    ! Requires
+    call assert(mod(num_points,num_images())==0,error_message("global_field%set: num_points not evenly divisible by num_images()"))
+
+    num_global_points=num_points
+    num_local_points=num_points/num_images()
+    num_intervals = num_global_points ! right-side boundary point is redundant and therefore not counted or stored
+    dx=two_pi/real(num_intervals,real64)
+
+    if (.not.allocated(this%values)) allocate(this%values(num_local_points)[*])
+    local_grid = [((this_image()-1)*num_local_points+i-1,i=1,num_local_points)]*dx
+    do concurrent(i=1:num_local_points)
+      this%values(i) = initial_function(local_grid(i))
+    end do
+    call synchronize()
+
+    ! Ensures
+    call this%mark_as_defined
+    call assert(allocated(dx),error_message("global_field%set: dx has not been allocated"))
+    call assert(allocated(num_global_points),error_message("global_field%set: num_global_points has not been allocated"))
+    call assert(allocated(num_local_points),error_message("global_field%set: num_local_points has not been allocated"))
+  end subroutine
+
+  subroutine assign_local_field(lhs,rhs)
+    class(global_field), intent(inout) :: lhs
+    class(local_field), intent(in) :: rhs
+    real(real64), allocatable :: values(:)
+    ! Requires
+    if (.not.allocated(num_local_points)) error stop "global_field: no value established for memory allocation yet."
+    if (.not.allocated(lhs%values)) allocate(lhs%values(num_local_points)[*])
+    call assert(rhs%user_defined(),error_message("global_field%assign_local_field received uninitialized RHS."))
+    lhs%values(:) = rhs%state()
+    call synchronize()
+    ! Ensures
+    call lhs%mark_as_defined
+  end subroutine
+
+  pure function add_local_field(lhs,rhs) result(total)
+    class(global_field), intent(in) :: lhs
+    type(local_field), intent(in) :: rhs
+    type(local_field) :: total
+    ! Requires
+    if (lhs%user_defined() .and. rhs%user_defined()) then
+      total = lhs%values + rhs%state()
+      call total%mark_as_defined
+    end if
+  end function
+
+  pure function multiply(lhs,rhs) result(product_)
+    class(global_field), intent(in) :: lhs,rhs
+    type(local_field) :: product_
+    ! Requires
+    if (lhs%user_defined() .and. rhs%user_defined()) then
+      product_= lhs%values * rhs%values
+      call product_%mark_as_defined
+    end if
+  end function
+
+ pure function x(this) result(this_x)
+    class(global_field), intent(in) :: this
+    type(local_field) :: this_x
+    real(real64) :: local_this_x(num_local_points)
+    integer(int64) :: i,left_neighbor,right_neighbor
+    ! Requires
+    if (this%user_defined() .and. allocated(dx) .and. allocated(num_local_points)) then
+      associate(N=>num_local_points,me=>this_image())
+        left_neighbor = merge(num_images(),me-1,me==1)
+        local_this_x(1)=(this%values(2)-this%values(N)[left_neighbor])/(2._real64*dx)
+        do concurrent(i=2:N-1)
+          local_this_x(i)=(this%values(i+1)-this%values(i-1))/(2._real64*dx)
+        end do
+        right_neighbor = merge(1,me+1,me==num_images())
+        local_this_x(N)=(this%values(1)[right_neighbor]-this%values(N-1))/(2._real64*dx)
+      end associate
+      this_x = local_this_x
+      ! Ensures
+      call this_x%mark_as_defined
+    end if
+  end function
+
+ !pure function xx(this) result(this_xx)
+  function xx(this) result(this_xx)
+    class(global_field), intent(in) :: this
+    type(local_field) :: this_xx
+    real(real64) :: local_this_xx(num_local_points)
+    integer(int64) :: i,left_neighbor,right_neighbor
+    ! Requires
+    if (this%user_defined() .and. allocated(dx) .and. allocated(num_local_points)) then
+      associate(N=>num_local_points,me=>this_image())
+        left_neighbor = merge(num_images(),me-1,me==1)
+        local_this_xx(1)=(this%values(2)-2._real64*this%values(1)+this%values(N)[left_neighbor])/dx**2
+        do concurrent(i=2:N-1)
+          local_this_xx(i)=(this%values(i+1)-2._real64*this%values(i)+this%values(i-1))/dx**2
+        end do
+        right_neighbor = merge(1,me+1,me==num_images())
+        local_this_xx(N)=(this%values(1)[right_neighbor]-2._real64*this%values(N)+this%values(N-1))/dx**2
+      end associate
+      this_xx = local_this_xx
+      ! Ensures
+      call this_xx%mark_as_defined
+     !print *,"On image ",this_image(),", local_this_xx=",local_this_xx
+     !stop
+    end if
+  end function
+
+  subroutine output(this,unit,iotype,v_list,iostat,iomsg)
+    class(global_field), intent(in) :: this
+    integer, intent(in) :: unit ! Unit on which output happens (negative for internal file)
+    character(*), intent(in) :: iotype ! Allowable values: ’LISTDIRECTED’,’NAMELIST’, or ’DT’
+    integer, intent(in) :: v_list(:)
+    integer, intent(out) :: iostat
+    character(*), intent(inout) :: iomsg
+    integer(int64) :: i
+    ! Requires
+    call assert(this%user_defined(),error_message("global_field%output received uninitialized object"))
+    do i=1,size(this%values)
+      write(unit,iostat=iostat) (this_image()-1)*size(this%values) + i, this%values(i)
+    end do
+  end subroutine
+
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/cray_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/cray_capabilities.txt
new file mode 100644
index 0000000..038a0b8
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/cray_capabilities.txt
@@ -0,0 +1,3 @@
+#define COMPILER_SUPPORTS_CO_SUM
+#define COMPILER_SUPPORTS_IEEE_MATH
+#define COMPILER_LACKS_DERIVED_TYPE_IO
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/gfortran4.8_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/gfortran4.8_capabilities.txt
new file mode 100644
index 0000000..62347c4
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/gfortran4.8_capabilities.txt
@@ -0,0 +1,3 @@
+#define ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+#define LACKS_SUPPORT_FOR_PARENT_WITH_COARRAY
+#define COMPILER_LACKS_CO_SUM
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/gfortran_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/gfortran_capabilities.txt
new file mode 100644
index 0000000..951266a
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/gfortran_capabilities.txt
@@ -0,0 +1 @@
+#define COMPILER_LACKS_DERIVED_TYPE_IO
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/ibm_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/ibm_capabilities.txt
new file mode 100644
index 0000000..8b34ff6
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/ibm_capabilities.txt
@@ -0,0 +1,2 @@
+
+#define COMPILER_LACKS_CO_SUM
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/intel_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/intel_capabilities.txt
new file mode 100644
index 0000000..9579384
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/intel_capabilities.txt
@@ -0,0 +1,3 @@
+#define COMPILER_LACKS_IMPURE
+#define COMPILER_LACKS_CO_SUM
+#define COMPILER_LACKS_DERIVED_TYPE_IO
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/nag_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/nag_capabilities.txt
new file mode 100644
index 0000000..0034b15
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/nag_capabilities.txt
@@ -0,0 +1,3 @@
+#define COMPILER_LACKS_CO_SUM
+#define COMPILER_LACKS_DERIVED_TYPE_IO
+#define COMPILER_LACKS_ERROR_STOP
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/portlandgroup_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/portlandgroup_capabilities.txt
new file mode 100644
index 0000000..d14cea5
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/portlandgroup_capabilities.txt
@@ -0,0 +1,3 @@
+#define COMPILER_LACKS_CO_SUM
+#define COMPILER_LACKS_ERROR_STOP
+#define COMPILER_LACKS_DO_CONCURRENT
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/tau_cray_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/tau_cray_capabilities.txt
new file mode 100644
index 0000000..7989121
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/tau_cray_capabilities.txt
@@ -0,0 +1,5 @@
+#define COMPILER_SUPPORTS_CO_SUM
+#define COMPILER_SUPPORTS_IEEE_MATH
+#define COMPILER_LACKS_DERIVED_TYPE_IO
+#define TAU_CRAY
+#define TAU
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/include-files/tau_intel_capabilities.txt b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/tau_intel_capabilities.txt
new file mode 100644
index 0000000..b35ef4f
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/include-files/tau_intel_capabilities.txt
@@ -0,0 +1,5 @@
+#define COMPILER_LACKS_IMPURE
+#define COMPILER_LACKS_CO_SUM
+#define COMPILER_LACKS_DERIVED_TYPE_IO
+#define TAU
+#define TAU_INTEL
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/library/ForTrilinos_assertion_utility.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/library/ForTrilinos_assertion_utility.F90
new file mode 100644
index 0000000..0687e81
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/library/ForTrilinos_assertion_utility.F90
@@ -0,0 +1,174 @@
+!*********************************************************************
+! ForTrilinos: Object-Oriented Fortran 2003 interface to Trilinos
+!     Copyright 2010 Sandia Corporation
+!
+! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+! the U.S. Government retains certain rights in this software.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!
+! 1. Redistributions of source code must retain the above copyright
+!    notice, this list of conditions and the following disclaimer.
+!
+! 2. Redistributions in binary form must reproduce the above copyright
+!    notice, this list of conditions and the following disclaimer in the
+!    documentation and/or other materials provided with the distribution.
+!
+! 3. Neither the name of the Corporation nor the names of the
+!    contributors may be used to endorse or promote products derived from
+!    this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+! Questions? Contact Karla Morris  (knmorri at sandia.gov)
+!                    Damian Rouson (rouson at sandia.gov)
+!*********************************************************************
+
+module ForTrilinos_assertion_utility
+#include "compiler_capabilities.txt"
+  use iso_fortran_env ,only : error_unit
+  use object_interface, only : object
+  implicit none
+  !> @cond Private
+  private
+  !> @endcond
+  public :: error_message,assert,assert_identical
+
+!> @cond Do not show max_string_length
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+  integer ,parameter :: max_string_length=256
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+!> @endcond
+  type error_message
+    private
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+    character(len=max_string_length) :: string ! gfortran 4.7.0 workaround
+#else
+    character(:) ,allocatable :: string
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+    integer, allocatable :: idata(:)
+    real, allocatable :: rdata(:)
+    complex, allocatable :: cdata(:)
+    character, allocatable :: chdata(:)
+    logical, allocatable :: ldata(:)
+    class(object), allocatable :: odata
+  end type
+
+  !> @cond Interface
+  interface error_message ! constructor
+    module procedure new_message
+  end interface
+
+  interface assert
+    module procedure scalar_assert,vector_assert
+  end interface
+  !> @endcond
+
+contains
+
+  type(error_message) function new_message(message,message_data)
+    use object_interface, only : object
+    character(len=*), intent(in) :: message
+    class(*), intent(in), optional :: message_data
+    new_message%string = message
+    if (present(message_data)) then
+      select type(message_data)
+        type is (character(len=*))
+          new_message%chdata = message_data
+        type is (real)
+          new_message%rdata = message_data
+        type is (integer)
+          new_message%idata = message_data
+        type is (logical)
+          new_message%ldata = message_data
+        type is (complex)
+          new_message%cdata = message_data
+        class is (object)
+          allocate(new_message%odata,source = message_data)
+        class default
+#ifndef COMPILER_LACKS_ERROR_STOP
+          error  &
+#endif
+          stop "error_message constructor: unrecognized message_data type"
+      end select
+    end if
+  end function
+
+  subroutine scalar_assert(assertion,message)
+    logical ,intent(in) :: assertion
+    type(error_message) ,intent(in) :: message
+    integer io_status
+    character(len=132) io_message
+    if (.not. assertion) then
+      write(error_unit,fmt='(31a)',advance="no") 'Assertion failed with message: '
+#ifndef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+      if (allocated(message%string)) then
+#endif
+        write(error_unit,*) message%string
+#ifndef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+      else
+        write(error_unit,*) '(no message provided).'
+      end if
+#endif
+      if (allocated(message%idata)) write(error_unit,*) 'Integer test data: ',message%idata
+      if (allocated(message%rdata)) write(error_unit,*) 'Real test data: ',message%rdata
+      if (allocated(message%odata)) then
+#ifdef COMPILER_LACKS_DERIVED_TYPE_IO
+        call message%odata%output(error_unit,v_list=[10,3],iotype='DT',iostat=io_status,iomsg=io_message)
+#else
+        write(error_unit,fmt="(dt(10,3))",iostat=io_status,iomsg=io_message)  message%odata
+#endif /* COMPILER_LACKS_DERIVED_TYPE_IO */
+      end if
+
+      stop "scalar_assert: assertion failure"
+    end if
+  end subroutine
+
+  subroutine vector_assert(assertion,text)
+    logical ,dimension(:) ,intent(in) :: assertion
+    type(error_message) ,dimension(:) ,intent(in) :: text
+    integer :: i
+    logical :: any_failures
+    call assert_identical( [size(assertion),size(text)] )
+    any_failures=.false.
+    do i=1,size(assertion)
+      if (.not. assertion(i)) then
+        any_failures=.true.
+        write(error_unit,fmt='(31a)',advance="no") 'Assertion failed with message: '
+       !if (allocated(text(i)%string)) then
+          write(error_unit,*) text(i)%string
+       !else
+       !  write(error_unit,*) '(no message provided).'
+       !end if
+      end if
+    end do
+    if (any_failures) stop 'Execution halted on failed assertion(s)!'
+  end subroutine
+
+
+  subroutine assert_identical(integers)
+    integer ,dimension(:) ,intent(in) :: integers
+    integer :: i
+    logical :: any_mismatches
+    any_mismatches = .false.
+    do i=2,size(integers)
+      if (integers(i) /= integers(1)) then
+        any_mismatches = .true.
+        write(error_unit,*) &
+        'Value ',i,' does not match expected value ',integers(1)
+      end if
+    end do
+    if (any_mismatches) stop 'Execution halted on failed assertion!'
+  end subroutine
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/library/ForTrilinos_error.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/library/ForTrilinos_error.F90
new file mode 100644
index 0000000..3f3c981
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/library/ForTrilinos_error.F90
@@ -0,0 +1,86 @@
+!*********************************************************************
+! ForTrilinos: Object-Oriented Fortran 2003 interface to Trilinos
+!                Copyright 2010 Sandia Corporation
+!
+! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+! the U.S. Government retains certain rights in this software.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!
+! 1. Redistributions of source code must retain the above copyright
+!    notice, this list of conditions and the following disclaimer.
+!
+! 2. Redistributions in binary form must reproduce the above copyright
+!    notice, this list of conditions and the following disclaimer in the
+!    documentation and/or other materials provided with the distribution.
+!
+! 3. Neither the name of the Corporation nor the names of the
+!    contributors may be used to endorse or promote products derived from
+!    this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+! Questions? Contact Karla Morris  (knmorri at sandia.gov) or
+!                    Damian Rouson (rouson at sandia.gov)
+!*********************************************************************
+
+module ForTrilinos_error
+#include "compiler_capabilities.txt"
+  implicit none
+  private
+  public :: error
+
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+  integer ,parameter :: max_string_length=256
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+
+  type :: error
+    private
+    integer code
+    class(*), allocatable :: data_(:)
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+    character(len=max_string_length) message ! gfortran 4.7.0 workaround
+#else
+    character(:) ,allocatable :: message
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+  contains
+    procedure :: error_code
+    procedure :: define_error
+    generic :: error=>define_error
+  end type
+
+contains
+
+  subroutine define_error(this,new_code,new_message,new_data)
+    class(error), intent(out) :: this
+    integer ,intent(in) :: new_code
+    character(len=*) ,intent(in) :: new_message
+    class(*) ,intent(in), optional :: new_data(:)
+    this%code = new_code
+    this%message = new_message
+    if (present(new_data)) allocate(this%data_(lbound(new_data,1):ubound(new_data,1)),source=new_data)
+  end subroutine
+
+  integer function error_code(this)
+    class(error) ,intent(in) :: this
+    error_code = this%code
+  end function
+
+  function error_message(this)
+    class(error) ,intent(in) :: this
+    character(:), allocatable :: error_message
+    error_message = this%message
+  end function
+
+end module ForTrilinos_error
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/library/co_object_interface.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/library/co_object_interface.F90
new file mode 100644
index 0000000..86d9d90
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/library/co_object_interface.F90
@@ -0,0 +1,47 @@
+module co_object_interface
+  implicit none
+  private
+  public :: co_object
+
+  ! Define an abstract base class to ensure basic functionality expected to be provided by all concrete Morfeus classes.
+  ! Each concrete class provides the functionality by extending this class and implementing its deferred binding(s).  This
+  ! class resembles java's Object class in the sense that it is intended to be the ultimate ancester of every other class.
+  type, abstract :: co_object
+    private
+    logical :: defined=.false. ! Mark all co_objects as not-yet user-defined by default
+    real, allocatable :: dummy_to_facilitate_extension[:]
+  contains
+    procedure :: mark_as_defined
+    procedure :: user_defined
+    procedure(formatted_output_interface), deferred :: output
+   !generic :: write(unformatted) => output  ! Derived-type I/O not yet supported by most compilers
+  end type
+
+  ! Require child classes to write an "output" procedure that prints to the passed file unit
+  abstract interface
+    subroutine formatted_output_interface(this,unit,iotype,v_list,iostat,iomsg)
+      import co_object
+      class(co_object), intent(in) :: this
+      integer, intent(in) :: unit ! Unit on which output happens (negative for internal file)
+      character(*), intent(in) :: iotype ! Allowable values: ’LISTDIRECTED’,’NAMELIST’, or ’DT’
+      integer, intent(in) :: v_list(:)
+      integer, intent(out) :: iostat
+      character(*), intent(inout) :: iomsg
+    end subroutine
+  end interface
+
+contains
+
+  ! Mark the co_object as user-defined
+  pure subroutine mark_as_defined(this)
+    class(co_object), intent(inout) :: this
+    this%defined=.true.
+  end subroutine
+
+  ! Return a boolean result indicating whether this co_object has been initialized since its declaration
+  logical pure function user_defined(this)
+    class(co_object), intent(in) :: this
+    user_defined = this%defined
+  end function
+
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/library/object_interface.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/library/object_interface.F90
new file mode 100644
index 0000000..677882d
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/library/object_interface.F90
@@ -0,0 +1,51 @@
+module object_interface
+#include "compiler_capabilities.txt"
+  implicit none
+  private
+  public :: object
+
+  ! Define an abstract parent type to ensure basic functionality expected to be provided by all non-abstract types.
+  ! Each non-abstract type provides the functionality by extending this type and implementing its deferred binding(s).  This
+  ! type resembles java's Object class in the sense that it is intended to be the ultimate ancester of every other type.
+  type, abstract :: object
+    private
+    logical :: defined=.false. ! Mark all objects as not-yet user-defined by default
+  contains
+    procedure :: mark_as_defined
+    procedure :: user_defined
+    procedure(output_interface), deferred :: output
+#ifndef COMPILER_LACKS_DERIVED_TYPE_IO
+    generic :: write(formatted) => output  ! Derived-type I/O
+#endif /* COMPILER_LACKS_DERIVED_TYPE_IO */
+  end type
+
+  ! Require child classes to write an "output" procedure that prints to the passed file unit
+  abstract interface
+
+    subroutine output_interface(this,unit,iotype,v_list,iostat,iomsg)
+      import object
+      class(object), intent(in) :: this
+      integer, intent(in) :: unit
+      character(len=*), intent(in) :: iotype
+      integer, intent(in) :: v_list(:)
+      integer, intent(out) :: iostat
+      character(len=*), intent(inout) :: iomsg
+    end subroutine
+
+  end interface
+
+contains
+
+  ! Mark the object as user-defined
+  pure subroutine mark_as_defined(this)
+    class(object), intent(inout) :: this
+    this%defined=.true.
+  end subroutine
+
+  ! Return a boolean result indicating whether this object has been initialized since its declaration
+  logical pure function user_defined(this)
+    class(object), intent(in) :: this
+    user_defined = this%defined
+  end function
+
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/local_field.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/local_field.F90
new file mode 100644
index 0000000..6d02b7c
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/local_field.F90
@@ -0,0 +1,76 @@
+module local_field_module
+   use iso_fortran_env, only : real64,int64
+   use ForTrilinos_assertion_utility, only : assert,error_message
+   use object_interface, only : object
+   implicit none
+   private
+   public :: local_field
+
+   type, extends(object) :: local_field
+     private
+     real(real64), allocatable :: values(:)
+   contains
+     procedure :: state
+     procedure, private, pass(rhs) :: multiply
+     procedure, private :: subtract
+     procedure, private :: assign_array
+     generic :: operator(-)=>subtract
+     generic :: operator(*)=>multiply
+     generic :: assignment(=)=>assign_array
+     procedure :: output
+   end type
+
+contains
+
+  pure subroutine assign_array(lhs,rhs)
+    class(local_field), intent(inout) :: lhs
+    real(real64), intent(in) :: rhs(:)
+    lhs%values = rhs
+    ! Ensures
+    call lhs%mark_as_defined
+  end subroutine
+
+  pure function subtract(lhs,rhs) result(difference)
+    class(local_field), intent(in) :: lhs,rhs
+    type(local_field) :: difference
+    !Requires
+    if (lhs%user_defined() .and. rhs%user_defined()) then
+      difference%values = lhs%values - rhs%values
+      ! Ensures
+      call difference%mark_as_defined
+    end if
+  end function
+
+  pure function multiply(lhs,rhs) result(product_)
+    class(local_field), intent(in) :: rhs
+    type(local_field) :: product_
+    real(real64), intent(in) :: lhs
+    if (rhs%user_defined()) then
+      product_%values = lhs*rhs%values
+      ! Ensures
+      call product_%mark_as_defined
+    end if
+  end function
+
+  pure function state(this) result(this_values)
+    class(local_field), intent(in) :: this
+    real(real64), allocatable :: this_values(:)
+    this_values = this%values
+  end function
+
+  subroutine output(this,unit,iotype,v_list,iostat,iomsg)
+    class(local_field), intent(in) :: this
+    integer, intent(in) :: unit
+    character(len=*), intent(in) :: iotype
+    integer, intent(in) :: v_list(:)
+    integer, intent(out) :: iostat
+    character(len=*), intent(inout) :: iomsg
+    integer(int64) :: i
+    ! Requires
+    call assert(this%user_defined(),error_message("local_field%output received uninitialized object"))
+    do i=1,size(this%values)
+      write(unit,iostat=iostat) (this_image()-1)*size(this%values) + i, this%values(i)
+    end do
+  end subroutine
+
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/main.F90 b/src/tests/integration/pde_solvers/coarrayBurgers/main.F90
new file mode 100644
index 0000000..5225e2b
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/main.F90
@@ -0,0 +1,86 @@
+program main
+  use iso_fortran_env, only : real64,int64,compiler_version,compiler_options
+  use ieee_arithmetic, only : ieee_is_nan
+  use global_field_module, only : global_field,initial_condition
+  use ForTrilinos_assertion_utility, only : assert,error_message
+  implicit none
+  type(global_field) :: u,u_half,half_uu
+  real(real64), parameter :: nu=1.,final_time=0.6_real64,tolerance=1.E-3_real64,safety_factor=0.1_real64
+  real(real64) :: time=0.,dt,dx
+  integer, parameter :: nodes=16
+  procedure(initial_condition), pointer :: initial_u=>ten_sin
+
+#ifdef TAU
+  call TAU_PROFILE_SET_NODE(this_image()-1) ! Start TAU (Cray or GNU compiler)
+#else
+#ifdef TAU_INTEL
+  call TAU_PROFILE_SET_NODE(this_image())   ! Start TAU (Intel compiler)
+#endif
+#endif
+
+  call u%set(initial_u,num_points=nodes)
+  dx = u%grid_spacing()
+  dt = safety_factor*diffusion_stability_limit(nu,dx,order_of_accuracy=2)
+  do while(time<final_time)
+    half_uu = 0.5_real64*(u*u)
+    u_half = u + (dt/2._real64)*(nu*u%xx() - half_uu%x())
+    half_uu = 0.5_real64*(u_half*u_half)
+    u      = u + dt*(nu*u_half%xx() - half_uu%x())
+    time = time + dt
+  end do
+  if (this_image()==1) print *,"Time =",time
+  print *,"On image ",this_image(),"u =",u%state()
+  call test(u)
+  sync all
+  if (this_image()==1) print *,"Test passed."
+
+contains
+  subroutine test(burgers_solution)
+    type(global_field), intent(in) :: burgers_solution
+    call assert(.not.any(ieee_is_nan(u%state())),error_message("Test failed: u is not a number."))
+    call assert(sinusoid(u),error_message("Test failed: improper shape."))
+  end subroutine
+
+  function sinusoid(u_solution) result(is_sinusoid)
+    type(global_field), intent(in) :: u_solution
+    type(global_field) :: u_xx
+    logical :: is_sinusoid
+    real(real64), parameter :: threshold=-0.001,cap=0.001
+    real(real64), allocatable :: u_xx_state(:)
+    u_xx = u_solution%xx()
+    u_xx_state = u_xx%state()
+    if (num_images()/=1) then
+      ! Ensure that the global midpoint is a local endpoint for whatever image contains the midpoint:
+      call assert(mod(num_images(),2)==0,error_message("Test failed: uneven number of images."))
+      ! Ensure that the left and right halves of the solution are concave down and up, respectively:
+      if (this_image()<=num_images()/2) then
+        call assert(all(u_xx_state<cap),error_message("Test failed: right half not concave up."))
+      else
+        call assert(all(u_xx_state>threshold),error_message("Test failed: left half not concave down."))
+      end if
+    else
+      block
+        integer :: size_u_xx
+        size_u_xx = size(u_xx_state)
+        call assert(all(u_xx_state(1:size_u_xx/2)<cap),error_message("Test failed: left half not concave down."))
+        call assert(all(u_xx_state(size_u_xx/2+1:size_u_xx)>threshold),error_message("Test failed: right half not concave up."))
+      end block
+    end if
+    is_sinusoid=.true.
+  end function
+
+  pure function diffusion_stability_limit(diffusivity,delta_x,order_of_accuracy)  result(stable_time_step)
+    real(real64), intent(in) :: diffusivity,delta_x
+    integer, intent(in) :: order_of_accuracy
+    real(real64) :: stable_time_step
+    real(real64), parameter, dimension(*) :: stability_limit=[2.,2.,2.5,2.79] ! third value needs to be checked
+    ! See Moin, P. (2010) Fundamentals of Engineering Numerical Analysis, 2nd ed., pp. 111-116.
+    stable_time_step = safety_factor*stability_limit(order_of_accuracy)*(delta_x**2)/(4._real64*diffusivity)
+  end function
+
+  pure function ten_sin(x) result(ten_sin_x)
+    real(real64), intent(in) :: x
+    real(real64) :: ten_sin_x
+    ten_sin_x = 10._real64*sin(x)
+  end function
+end program
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/run.sh b/src/tests/integration/pde_solvers/coarrayBurgers/run.sh
new file mode 100755
index 0000000..b2a812c
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/run.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+echo "Building the code:"
+export TAU_MAKEFILE=/opt/paratools/tau/x86_64/lib/Makefile.tau-mpi-pdt
+export TAU_OPTIONS="-optVerbose -optCompInst"
+make clean
+make -f Makefile.inst
+
+# Specify TAU parameters here:
+export TAU_CALLPATH=1
+export TAU_CALLPATH_DEPTH=100
+#export TAU_SAMPLING=1
+
+for i in 1 2 4
+do
+  echo "Running the code:"
+  mpirun -np ${i} ./burgers
+  paraprof --pack ${i}p.ppk
+  taudb_loadtrial -a fireworks -x experiment -n ${i} ${i}p.ppk
+done
+
+echo "Running the pprof command:"
+pprof
+echo "Running the TAU paraprof analyzer command:"
+paraprof &
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce.pbs b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce.pbs
new file mode 100644
index 0000000..43d7490
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce.pbs
@@ -0,0 +1,15 @@
+#!/bin/bash --login
+#PBS -l mppwidth=64
+#PBS -l walltime=00:10:00
+#PBS -l mppnppn=16
+#PBS -j oe
+module load craype-hugepages2M
+# change to the directory from which the job was submitted
+cd $PBS_O_WORKDIR
+# Get total number of processes and the number per node
+#export NPROC=‘qstat -f $PBS_JOBID | awk ’/mppwidth/ {print $3}’‘
+#export NTASK=‘qstat -f $PBS_JOBID | awk ’/mppnppn/ {print $3}’‘
+echo "Starting job $PBS_JOBID at ‘date‘"
+time aprun -n64 -N16 ./burgers_caf
+#time aprun -n$NPROC -N $NTASK ./parallel_tmps
+echo "Finished job $PBS_JOBID at ‘date‘"
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce_multiple.pbs b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce_multiple.pbs
new file mode 100755
index 0000000..0d33146
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce_multiple.pbs
@@ -0,0 +1,26 @@
+#!/bin/bash --login
+#PBS -q debug
+#PBS -l mppwidth=48
+#PBS -l mppnppn=24
+#PBS -l walltime=00:30:00
+#PBS -N burgers_caf_48
+#PBS -e burgers_caf.$PBS_JOBID.err
+#PBS -o burgers_caf.$PBS_JOBID.out
+#PBS -j oe
+module load craype-hugepages8M
+#module unload darshan
+module swap PrgEnv-pgi PrgEnv-cray
+# change to the directory from which the job was submitted
+cd $PBS_O_WORKDIR
+# Get total number of processes and the number per node
+export NPROC=`qstat -f $PBS_JOBID |awk '/\.mppwidth/ {print $3}'`
+export NTASK=`qstat -f $PBS_JOBID |awk '/\.mppnppn/ {print $3}'`
+echo NPROC = $NPROC
+echo NTASK = $NTASK
+mkdir ${NPROC}_${NTASK}
+cd ${NPROC}_${NTASK}
+echo "NNODES = $NPROC , NTASKS_PER_NODE = $NTASK" >> burgers_${NPROC}_${NTASK}.cray
+for i in {1..5}
+do
+ aprun -n$NPROC -N $NTASK ../burgers_caf_$NPROC >> burgers_${NPROC}_${NTASK}.cray
+done
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce_tau.pbs b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce_tau.pbs
new file mode 100755
index 0000000..0d152f8
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/coarrayBurgers_cce_tau.pbs
@@ -0,0 +1,24 @@
+#!/bin/bash --login
+#PBS -q debug
+#PBS -l mppwidth=48
+#PBS -l mppnppn=24
+#PBS -l walltime=00:30:00
+#PBS -j oe
+module load craype-hugepages8M
+module unload darshan
+module swap PrgEnv-pgi PrgEnv-cray
+module load java/1.6.0_13
+export PATH=/global/u1/s/sameer/pkgs/tau-2.23/craycnl/bin:$PATH
+export TAU_MAKEFILE=/global/u1/s/sameer/pkgs/tau-2.23/craycnl/lib/Makefile.tau-cray-cce822-mpi-pdt
+export TAU_COMM_MATRIX=1
+export PATH=/global/homes/s/sameer/tau2/craycnl/binutils-2.23.1/bin:$PATH
+export TAU_SAMPLING=1
+# change to the directory from which the job was submitted
+cd $PBS_O_WORKDIR
+# Get total number of processes and the number per node
+#export NPROC=‘qstat -f $PBS_JOBID | awk ’/mppwidth/ {print $3}’‘
+#export NTASK=‘qstat -f $PBS_JOBID | awk ’/mppnppn/ {print $3}’‘
+echo "Starting job $PBS_JOBID at ‘date‘"
+time aprun -n48 -N24 ./burgers_caf
+#time aprun -n$NPROC -N $NTASK ./parallel_tmps
+echo "Finished job $PBS_JOBID at ‘date‘"
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run.sh b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run.sh
new file mode 100755
index 0000000..da25290
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+echo "Building the code:"
+export TAU_MAKEFILE=/opt/paratools/tau/x86_64/lib/Makefile.tau-mpi-pdt
+export TAU_OPTIONS="-optVerbose -optCompInst"
+make clean
+make -f Makefile.inst
+
+# Specify TAU parameters here:
+export TAU_CALLPATH=1
+export TAU_CALLPATH_DEPTH=100
+#export TAU_SAMPLING=1
+
+echo "Running the code:"
+mpirun -np 4 ./burgers
+
+echo "Running the pprof command:"
+pprof
+echo "Running the TAU paraprof analyzer command:"
+paraprof &
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run.sh.with_modules b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run.sh.with_modules
new file mode 100755
index 0000000..7c8f7fe
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run.sh.with_modules
@@ -0,0 +1,12 @@
+#!/bin/bash
+source /usr/local/packages/Modules/init/bash
+module load intel/14.0
+source /usr/local/packages/intel/14.0/bin/ifortvars.sh intel64
+export TAU_MAKEFILE=/usr/local/packages/tau-2.22.2/x86_64/lib/Makefile.tau-coarray-icpc-mpi
+export TAU_COMM_MATRIX=1
+export PATH=/usr/local/packages/tau-2.22.2/x86_64/bin:$PATH
+module load java
+export PATH=/usr/local/packages/intel/14.0/mpirt/bin/intel64/:$PATH
+export FOR_COARRAY_NUM_IMAGES=32
+echo "Using FOR_COARRAY_NUM_IMAGES = " $FOR_COARRAY_NUM_IMAGES
+./burgers_caf
diff --git a/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run_troubleshooting.sh b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run_troubleshooting.sh
new file mode 100755
index 0000000..e9700b1
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayBurgers/scripts/run_troubleshooting.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+source /usr/local/packages/Modules/init/bash
+module load intel/14.0
+source /usr/local/packages/intel/14.0/bin/ifortvars.sh intel64
+export TAU_MAKEFILE=/usr/local/packages/tau-2.22.2/x86_64/lib/Makefile.tau-coarray-icpc-mpi
+export TAU_COMM_MATRIX=1
+export PATH=/usr/local/packages/tau-2.22.2/x86_64/bin:$PATH
+module load java
+export PATH=/usr/local/packages/intel/14.0/mpirt/bin/intel64/:$PATH
+#export FOR_COARRAY_NUM_IMAGES=1
+echo "Using FOR_COARRAY_NUM_IMAGES = " $FOR_COARRAY_NUM_IMAGES
+mpirun --mca btl_tcp_if_include eth2 -np 16 ./burgers_caf
+#./burgers_caf
diff --git a/src/tests/integration/pde_solvers/coarrayHeatSimplified/CMakeLists.txt b/src/tests/integration/pde_solvers/coarrayHeatSimplified/CMakeLists.txt
new file mode 100644
index 0000000..84c8ffc
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayHeatSimplified/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_executable(co_heat
+  main.f90
+  global_field.f90
+  local_field.f90
+)
+target_link_libraries(co_heat OpenCoarrays)
diff --git a/src/tests/integration/pde_solvers/coarrayHeatSimplified/Makefile b/src/tests/integration/pde_solvers/coarrayHeatSimplified/Makefile
new file mode 100644
index 0000000..31394bb
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayHeatSimplified/Makefile
@@ -0,0 +1,36 @@
+# Choose desired compiler (options: gnu, intel, cray):
+compiler=gnu
+executable=co_heat
+
+ifeq ($(compiler),gnu)
+  opencoarrays_dir=/opt/opencoarrays
+  opt=-L $(opencoarrays_dir)/lib
+  compile=mpif90 -fcoarray=lib
+  lib=-lcaf_mpi
+else
+  ifeq ($(compiler),intel)
+    compile=ifort -coarray=shared -standard-semantics -O3 -coarray-num-images=2
+  else
+    ifeq ($(compiler),cray)
+      compile=ftn -ew -h caf
+    endif
+  endif
+endif
+
+objects = local_field.o global_field.o
+
+$(executable): main.o $(objects) Makefile
+	$(compile) $(opt) main.o $(objects) -o $(executable) $(lib)
+
+main.o: main.f90 $(objects) Makefile
+	$(compile) -c main.f90
+
+global_field.o: global_field.f90 local_field.o Makefile
+	$(compile) -c global_field.f90
+
+local_field.o: local_field.f90 Makefile
+	$(compile) $(OPTS) -c local_field.f90
+
+.PHONY : clean
+clean :
+	-rm -f  *.o *.mod $(executable) core fort.*
diff --git a/src/tests/integration/pde_solvers/coarrayHeatSimplified/global_field.f90 b/src/tests/integration/pde_solvers/coarrayHeatSimplified/global_field.f90
new file mode 100644
index 0000000..037471c
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayHeatSimplified/global_field.f90
@@ -0,0 +1,136 @@
+! Coarray 1D Heat Equation Solver Test: global_field_module
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+module global_field_module
+  use local_field_module, only : local_field
+  implicit none
+  private
+  public :: global_field
+
+  type global_field
+    private
+    real, allocatable :: values(:)[:]
+  contains
+    procedure :: set
+    procedure :: only_allocate
+    generic :: global_field_=>set,only_allocate
+    procedure, private :: laplacian
+    generic :: operator(.laplacian.) => laplacian
+    procedure, private :: add_local_field
+    generic :: operator(+) => add_local_field
+    procedure, private :: assign_local_field
+    generic :: assignment(=) => assign_local_field
+    procedure :: state
+  end type
+
+  real :: dx
+  integer, allocatable :: num_local_points
+  integer, parameter:: num_end_points=2
+  real :: boundary_vals(num_end_points)
+
+contains
+
+  subroutine only_allocate(this)
+    class(global_field), intent(inout) :: this
+    if (.not.allocated(num_local_points)) error stop "global_field: no value established for memory allocation yet."
+    allocate(this%values(num_local_points)[*]) ! Implicit synchronization point
+  end subroutine
+
+  subroutine set(this,internal_values,boundary_values,domain,num_global_points)
+    class(global_field), intent(inout) :: this
+    integer, intent(in) :: num_global_points
+    real, intent(in) :: internal_values,domain(num_end_points),boundary_values(num_end_points)
+    if (mod(num_global_points,num_images())/=0) error stop "set: num_global_points not evenly divisible by num_images()"
+    if (this_image()==1 .or. this_image()==num_images()) boundary_vals = boundary_values
+    if (.not.allocated(num_local_points)) num_local_points=num_global_points/num_images()
+    dx=(domain(2)-domain(1))/num_global_points
+    allocate(this%values(num_local_points)[*])
+    associate(west=>1,east=>2)
+      this%values(1) = merge(boundary_values(west),internal_values,this_image()==1)
+      this%values(2:num_local_points-1) = internal_values
+      this%values(num_local_points) = merge(boundary_values(east),internal_values,this_image()==num_images())
+    end associate
+    call synchronize()
+  end subroutine
+
+  subroutine synchronize()
+    if (num_images()>1) then
+      associate(me=>this_image())
+        if (me==1) then
+          sync images(me+1)
+        else if (me==num_images()) then
+          sync images(me-1)
+        else
+          sync images([me-1,me+1])
+        end if
+      end associate
+    end if
+  end subroutine
+
+  pure function laplacian(rhs) result(laplacian_rhs)
+    class(global_field), intent(in) :: rhs
+    type(local_field) :: laplacian_rhs
+    real :: local_laplacian(num_local_points)
+    integer :: i
+    associate(N=>num_local_points,me=>this_image())
+      if (me==1) then
+        local_laplacian(1) = 0.
+      else
+        local_laplacian(1)=(rhs%values(2)-2.*rhs%values(1)+rhs%values(N)[me-1])/dx**2
+      end if
+      do concurrent(i=2:N-1)
+        local_laplacian(i)=(rhs%values(i+1)-2.*rhs%values(i)+rhs%values(i-1))/dx**2
+      end do
+      if (me==num_images()) then
+        local_laplacian(N) = 0.
+      else
+        local_laplacian(N)=(rhs%values(1)[me+1]-2.*rhs%values(N)+rhs%values(N-1))/dx**2
+      end if
+    end associate
+    laplacian_rhs = local_laplacian
+  end function
+
+  pure function add_local_field(lhs,rhs) result(total)
+    class(global_field), intent(in) :: lhs
+    type(local_field), intent(in) :: rhs
+    type(local_field) :: total
+    total = lhs%values + rhs%state()
+  end function
+
+  subroutine assign_local_field(lhs,rhs)
+    class(global_field), intent(inout) :: lhs
+    class(local_field), intent(in) :: rhs
+    lhs%values(:) = rhs%state()
+    call synchronize()
+  end subroutine
+
+  pure function state(this) result(this_values)
+    class(global_field), intent(in) :: this
+    real :: this_values(size(this%values(:)))
+    this_values = this%values
+  end function
+
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayHeatSimplified/local_field.f90 b/src/tests/integration/pde_solvers/coarrayHeatSimplified/local_field.f90
new file mode 100644
index 0000000..204f73a
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayHeatSimplified/local_field.f90
@@ -0,0 +1,64 @@
+! Coarray 1D Heat Equation Solver Test: local_field_module
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+module local_field_module
+  implicit none
+  private
+  public :: local_field
+
+  type local_field
+    private
+    real, allocatable :: values(:)
+  contains
+    procedure, private :: multiply
+    generic :: operator(*)=>multiply
+    procedure :: state
+    procedure, private :: assign_array
+    generic :: assignment(=)=>assign_array
+  end type
+
+contains
+
+  pure function multiply(lhs,rhs) result(product_)
+    class(local_field), intent(in) :: lhs
+    type(local_field) :: product_
+    real, intent(in) :: rhs
+    product_%values = lhs%values*rhs
+  end function
+
+  pure function state(this) result(this_values)
+    class(local_field), intent(in) :: this
+    real :: this_values(size(this%values))
+    this_values = this%values
+  end function
+
+  pure subroutine assign_array(lhs,rhs)
+    class(local_field), intent(inout) :: lhs
+    real, intent(in) :: rhs(:)
+    lhs%values = rhs
+  end subroutine
+
+end module
diff --git a/src/tests/integration/pde_solvers/coarrayHeatSimplified/main.f90 b/src/tests/integration/pde_solvers/coarrayHeatSimplified/main.f90
new file mode 100644
index 0000000..4650c70
--- /dev/null
+++ b/src/tests/integration/pde_solvers/coarrayHeatSimplified/main.f90
@@ -0,0 +1,45 @@
+! Coarray 1D Heat Equation Solver Test: main
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+
+program main
+  use global_field_module, only : global_field
+  implicit none
+  type(global_field) :: T,laplacian_T,T_half
+  real, parameter :: alpha=1.,dt=0.0001,final_time=1.,tolerance=1.E-3
+  real :: time=0.
+  call T%global_field_(internal_values=0.,boundary_values=[1.,0.],domain=[0.,1.],num_global_points=16384)
+  call T_half%global_field_()
+  do while(time<final_time)
+    T_half = T + (.laplacian.T)*(alpha*dt/2.)
+    T      = T + (.laplacian.T_half)*(alpha*dt)
+    time = time + dt
+  end do
+  call laplacian_T%global_field_()
+  laplacian_T = .laplacian.T
+  if (any(laplacian_T%state()>tolerance)) error stop "Test failed."
+  if (this_image()==1) print *,"Test passed."
+end program
diff --git a/src/tests/integration/pde_solvers/include-files/cray_capabilities.txt b/src/tests/integration/pde_solvers/include-files/cray_capabilities.txt
new file mode 100644
index 0000000..e69de29
diff --git a/src/tests/integration/pde_solvers/include-files/gfortran_capabilities.txt b/src/tests/integration/pde_solvers/include-files/gfortran_capabilities.txt
new file mode 100644
index 0000000..951266a
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/gfortran_capabilities.txt
@@ -0,0 +1 @@
+#define COMPILER_LACKS_DERIVED_TYPE_IO
diff --git a/src/tests/integration/pde_solvers/include-files/ibm_capabilities.txt b/src/tests/integration/pde_solvers/include-files/ibm_capabilities.txt
new file mode 100644
index 0000000..8b34ff6
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/ibm_capabilities.txt
@@ -0,0 +1,2 @@
+
+#define COMPILER_LACKS_CO_SUM
diff --git a/src/tests/integration/pde_solvers/include-files/intel_capabilities.txt b/src/tests/integration/pde_solvers/include-files/intel_capabilities.txt
new file mode 100644
index 0000000..9579384
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/intel_capabilities.txt
@@ -0,0 +1,3 @@
+#define COMPILER_LACKS_IMPURE
+#define COMPILER_LACKS_CO_SUM
+#define COMPILER_LACKS_DERIVED_TYPE_IO
diff --git a/src/tests/integration/pde_solvers/include-files/nag_capabilities.txt b/src/tests/integration/pde_solvers/include-files/nag_capabilities.txt
new file mode 100644
index 0000000..a30193a
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/nag_capabilities.txt
@@ -0,0 +1 @@
+#define COMPILER_LACKS_CO_SUM
diff --git a/src/tests/integration/pde_solvers/include-files/portlandgroup_capabilities.txt b/src/tests/integration/pde_solvers/include-files/portlandgroup_capabilities.txt
new file mode 100644
index 0000000..a30193a
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/portlandgroup_capabilities.txt
@@ -0,0 +1 @@
+#define COMPILER_LACKS_CO_SUM
diff --git a/src/tests/integration/pde_solvers/include-files/tau_cray_capabilities.txt b/src/tests/integration/pde_solvers/include-files/tau_cray_capabilities.txt
new file mode 100644
index 0000000..7989121
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/tau_cray_capabilities.txt
@@ -0,0 +1,5 @@
+#define COMPILER_SUPPORTS_CO_SUM
+#define COMPILER_SUPPORTS_IEEE_MATH
+#define COMPILER_LACKS_DERIVED_TYPE_IO
+#define TAU_CRAY
+#define TAU
diff --git a/src/tests/integration/pde_solvers/include-files/tau_intel_capabilities.txt b/src/tests/integration/pde_solvers/include-files/tau_intel_capabilities.txt
new file mode 100644
index 0000000..b35ef4f
--- /dev/null
+++ b/src/tests/integration/pde_solvers/include-files/tau_intel_capabilities.txt
@@ -0,0 +1,5 @@
+#define COMPILER_LACKS_IMPURE
+#define COMPILER_LACKS_CO_SUM
+#define COMPILER_LACKS_DERIVED_TYPE_IO
+#define TAU
+#define TAU_INTEL
diff --git a/src/tests/integration/pde_solvers/library/CMakeLists.txt b/src/tests/integration/pde_solvers/library/CMakeLists.txt
new file mode 100644
index 0000000..5af305d
--- /dev/null
+++ b/src/tests/integration/pde_solvers/library/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_library(mofo
+   ForTrilinos_assertion_utility.F90
+   ForTrilinos_error.F90
+   object_interface.F90
+   co_object_interface.F90
+#  parse_command_line.F90
+)
diff --git a/src/tests/integration/pde_solvers/library/ForTrilinos_assertion_utility.F90 b/src/tests/integration/pde_solvers/library/ForTrilinos_assertion_utility.F90
new file mode 100644
index 0000000..b74f450
--- /dev/null
+++ b/src/tests/integration/pde_solvers/library/ForTrilinos_assertion_utility.F90
@@ -0,0 +1,169 @@
+!*********************************************************************
+! ForTrilinos: Object-Oriented Fortran 2003 interface to Trilinos
+!     Copyright 2010 Sandia Corporation
+!
+! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+! the U.S. Government retains certain rights in this software.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!
+! 1. Redistributions of source code must retain the above copyright
+!    notice, this list of conditions and the following disclaimer.
+!
+! 2. Redistributions in binary form must reproduce the above copyright
+!    notice, this list of conditions and the following disclaimer in the
+!    documentation and/or other materials provided with the distribution.
+!
+! 3. Neither the name of the Corporation nor the names of the
+!    contributors may be used to endorse or promote products derived from
+!    this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+! Questions? Contact Karla Morris  (knmorri at sandia.gov)
+!                    Damian Rouson (rouson at sandia.gov)
+!*********************************************************************
+
+module ForTrilinos_assertion_utility
+#include "compiler_capabilities.txt"
+  use iso_fortran_env ,only : error_unit
+  use object_interface, only : object
+  implicit none
+  !> @cond Private
+  private
+  !> @endcond
+  public :: error_message,assert,assert_identical
+
+!> @cond Do not show max_string_length
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+  integer ,parameter :: max_string_length=256
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+!> @endcond
+  type error_message
+    private
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+    character(len=max_string_length) :: string ! gfortran 4.7.0 workaround
+#else
+    character(:) ,allocatable :: string
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+    integer, allocatable :: idata(:)
+    real, allocatable :: rdata(:)
+    complex, allocatable :: cdata(:)
+    character, allocatable :: chdata(:)
+    logical, allocatable :: ldata(:)
+    class(object), allocatable :: odata
+  end type
+
+  !> @cond Interface
+  interface error_message ! constructor
+    module procedure new_message
+  end interface
+
+  interface assert
+    module procedure scalar_assert,vector_assert
+  end interface
+  !> @endcond
+
+contains
+
+  type(error_message) function new_message(message,message_data)
+    use object_interface, only : object
+    character(len=*), intent(in) :: message
+    class(*), intent(in), optional :: message_data
+    new_message%string = message
+    if (present(message_data)) then
+      select type(message_data)
+        type is (character(len=*))
+          new_message%chdata = message_data
+        type is (real)
+          new_message%rdata = message_data
+        type is (integer)
+          new_message%rdata = message_data
+        type is (logical)
+          new_message%ldata = message_data
+        type is (complex)
+          new_message%cdata = message_data
+        class is (object)
+          allocate(new_message%odata,source = message_data)
+      end select
+    end if
+  end function
+
+  subroutine scalar_assert(assertion,message)
+    logical ,intent(in) :: assertion
+    type(error_message) ,intent(in) :: message
+    integer io_status
+    character(len=132) io_message
+    if (.not. assertion) then
+      write(error_unit,fmt='(31a)',advance="no") 'Assertion failed with message: '
+#ifndef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+      if (allocated(message%string)) then
+#endif
+        write(error_unit,*) message%string
+#ifndef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+      else
+        write(error_unit,*) '(no message provided).'
+      end if
+#endif
+      if (allocated(message%idata)) write(error_unit,*) 'Integer test data: ',message%idata
+      if (allocated(message%rdata)) write(error_unit,*) 'Real test data: ',message%rdata
+      if (allocated(message%odata)) then
+#ifdef COMPILER_LACKS_DERIVED_TYPE_IO
+        call message%odata%output(error_unit,v_list=[10,3],iotype='DT',iostat=io_status,iomsg=io_message)
+#else
+        write(error_unit,fmt="(dt(10,3))",iostat=io_status,iomsg=io_message)  message%odata
+#endif /* COMPILER_LACKS_DERIVED_TYPE_IO */
+      end if
+
+      stop "scalar_assert: assertion failure"
+    end if
+  end subroutine
+
+  subroutine vector_assert(assertion,text)
+    logical ,dimension(:) ,intent(in) :: assertion
+    type(error_message) ,dimension(:) ,intent(in) :: text
+    integer :: i
+    logical :: any_failures
+    call assert_identical( [size(assertion),size(text)] )
+    any_failures=.false.
+    do i=1,size(assertion)
+      if (.not. assertion(i)) then
+        any_failures=.true.
+        write(error_unit,fmt='(31a)',advance="no") 'Assertion failed with message: '
+       !if (allocated(text(i)%string)) then
+          write(error_unit,*) text(i)%string
+       !else
+       !  write(error_unit,*) '(no message provided).'
+       !end if
+      end if
+    end do
+    if (any_failures) stop 'Execution halted on failed assertion(s)!'
+  end subroutine
+
+
+  subroutine assert_identical(integers)
+    integer ,dimension(:) ,intent(in) :: integers
+    integer :: i
+    logical :: any_mismatches
+    any_mismatches = .false.
+    do i=2,size(integers)
+      if (integers(i) /= integers(1)) then
+        any_mismatches = .true.
+        write(error_unit,*) &
+        'Value ',i,' does not match expected value ',integers(1)
+      end if
+    end do
+    if (any_mismatches) stop 'Execution halted on failed assertion!'
+  end subroutine
+end module
diff --git a/src/tests/integration/pde_solvers/library/ForTrilinos_error.F90 b/src/tests/integration/pde_solvers/library/ForTrilinos_error.F90
new file mode 100644
index 0000000..3f3c981
--- /dev/null
+++ b/src/tests/integration/pde_solvers/library/ForTrilinos_error.F90
@@ -0,0 +1,86 @@
+!*********************************************************************
+! ForTrilinos: Object-Oriented Fortran 2003 interface to Trilinos
+!                Copyright 2010 Sandia Corporation
+!
+! Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+! the U.S. Government retains certain rights in this software.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!
+! 1. Redistributions of source code must retain the above copyright
+!    notice, this list of conditions and the following disclaimer.
+!
+! 2. Redistributions in binary form must reproduce the above copyright
+!    notice, this list of conditions and the following disclaimer in the
+!    documentation and/or other materials provided with the distribution.
+!
+! 3. Neither the name of the Corporation nor the names of the
+!    contributors may be used to endorse or promote products derived from
+!    this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+! EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+! CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+! EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+! PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+! PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+! LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+! NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+! Questions? Contact Karla Morris  (knmorri at sandia.gov) or
+!                    Damian Rouson (rouson at sandia.gov)
+!*********************************************************************
+
+module ForTrilinos_error
+#include "compiler_capabilities.txt"
+  implicit none
+  private
+  public :: error
+
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+  integer ,parameter :: max_string_length=256
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+
+  type :: error
+    private
+    integer code
+    class(*), allocatable :: data_(:)
+#ifdef ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS
+    character(len=max_string_length) message ! gfortran 4.7.0 workaround
+#else
+    character(:) ,allocatable :: message
+#endif /* ForTrilinos_DISABLE_DEFERRED_LENGTH_CHARACTERS */
+  contains
+    procedure :: error_code
+    procedure :: define_error
+    generic :: error=>define_error
+  end type
+
+contains
+
+  subroutine define_error(this,new_code,new_message,new_data)
+    class(error), intent(out) :: this
+    integer ,intent(in) :: new_code
+    character(len=*) ,intent(in) :: new_message
+    class(*) ,intent(in), optional :: new_data(:)
+    this%code = new_code
+    this%message = new_message
+    if (present(new_data)) allocate(this%data_(lbound(new_data,1):ubound(new_data,1)),source=new_data)
+  end subroutine
+
+  integer function error_code(this)
+    class(error) ,intent(in) :: this
+    error_code = this%code
+  end function
+
+  function error_message(this)
+    class(error) ,intent(in) :: this
+    character(:), allocatable :: error_message
+    error_message = this%message
+  end function
+
+end module ForTrilinos_error
diff --git a/src/tests/integration/pde_solvers/library/co_object_interface.F90 b/src/tests/integration/pde_solvers/library/co_object_interface.F90
new file mode 100644
index 0000000..305d849
--- /dev/null
+++ b/src/tests/integration/pde_solvers/library/co_object_interface.F90
@@ -0,0 +1,73 @@
+! MoFo library: co_object_interface
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+module co_object_interface
+  implicit none
+  private
+  public :: co_object
+
+  ! Define an abstract base class to ensure basic functionality expected to be provided by all concrete Morfeus classes.
+  ! Each concrete class provides the functionality by extending this class and implementing its deferred binding(s).  This
+  ! class resembles java's Object class in the sense that it is intended to be the ultimate ancester of every other class.
+  type, abstract :: co_object
+    private
+    logical :: defined=.false. ! Mark all co_objects as not-yet user-defined by default
+    real, allocatable :: dummy_to_facilitate_extension[:]
+  contains
+    procedure :: mark_as_defined
+    procedure :: user_defined
+    procedure(formatted_output_interface), deferred :: output
+   !generic :: write(unformatted) => output  ! Derived-type I/O not yet supported by most compilers
+  end type
+
+  ! Require child classes to write an "output" procedure that prints to the passed file unit
+  abstract interface
+    subroutine formatted_output_interface(this,unit,iotype,v_list,iostat,iomsg)
+      import co_object
+      class(co_object), intent(in) :: this
+      integer, intent(in) :: unit ! Unit on which output happens (negative for internal file)
+      character(*), intent(in) :: iotype ! Allowable values: ’LISTDIRECTED’,’NAMELIST’, or ’DT’
+      integer, intent(in) :: v_list(:)
+      integer, intent(out) :: iostat
+      character(*), intent(inout) :: iomsg
+    end subroutine
+  end interface
+
+contains
+
+  ! Mark the co_object as user-defined
+  pure subroutine mark_as_defined(this)
+    class(co_object), intent(inout) :: this
+    this%defined=.true.
+  end subroutine
+
+  ! Return a boolean result indicating whether this co_object has been initialized since its declaration
+  logical pure function user_defined(this)
+    class(co_object), intent(in) :: this
+    user_defined = this%defined
+  end function
+
+end module
diff --git a/src/tests/integration/pde_solvers/library/object_interface.F90 b/src/tests/integration/pde_solvers/library/object_interface.F90
new file mode 100644
index 0000000..b877147
--- /dev/null
+++ b/src/tests/integration/pde_solvers/library/object_interface.F90
@@ -0,0 +1,77 @@
+! MoFo library: object_interface
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+module object_interface
+#include "compiler_capabilities.txt"
+  implicit none
+  private
+  public :: object
+
+  ! Define an abstract base class to ensure basic functionality expected to be provided by all concrete Morfeus classes.
+  ! Each concrete class provides the functionality by extending this class and implementing its deferred binding(s).  This
+  ! class resembles java's Object class in the sense that it is intended to be the ultimate ancester of every other class.
+  type, abstract :: object
+    private
+    logical :: defined=.false. ! Mark all objects as not-yet user-defined by default
+  contains
+    procedure :: mark_as_defined
+    procedure :: user_defined
+    procedure(output_interface), deferred :: output
+#ifndef COMPILER_LACKS_DERIVED_TYPE_IO
+    generic :: write(formatted) => output  ! Derived-type I/O
+#endif /* COMPILER_LACKS_DERIVED_TYPE_IO */
+  end type
+
+  ! Require child classes to write an "output" procedure that prints to the passed file unit
+  abstract interface
+
+    subroutine output_interface(this,unit,iotype,v_list,iostat,iomsg)
+      import object
+      class(object), intent(in) :: this
+      integer, intent(in) :: unit
+      character(len=*), intent(in) :: iotype
+      integer, intent(in) :: v_list(:)
+      integer, intent(out) :: iostat
+      character(len=*), intent(inout) :: iomsg
+    end subroutine
+
+  end interface
+
+contains
+
+  ! Mark the object as user-defined
+  pure subroutine mark_as_defined(this)
+    class(object), intent(inout) :: this
+    this%defined=.true.
+  end subroutine
+
+  ! Return a boolean result indicating whether this object has been initialized since its declaration
+  logical pure function user_defined(this)
+    class(object), intent(in) :: this
+    user_defined = this%defined
+  end function
+
+end module
diff --git a/src/tests/integration/pde_solvers/library/parse_command_line.f90 b/src/tests/integration/pde_solvers/library/parse_command_line.f90
new file mode 100644
index 0000000..d3ed6de
--- /dev/null
+++ b/src/tests/integration/pde_solvers/library/parse_command_line.f90
@@ -0,0 +1,104 @@
+! MoFo library: parse_command_line
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+module parse_command_line
+  ! Utility for returning key-value pairs passed at the command line in the format
+  ! command key1=value1  key2=value2 ...
+  implicit none
+  private
+  public :: get_keyword_values
+
+contains
+
+  subroutine assert(test_passes)
+    logical, intent(in) :: test_passes
+    if (.not. test_passes) stop "assertion failed"
+  end subroutine
+
+  ! Return the command-line values associated with the passed keys.
+  ! Arguments and result variables:
+  ! keys = array of keywords
+  ! default_values = values returned for like-positioned keyword if no command-line value specified
+  ! actual_values = values returned for like-positioned keys
+  ! Each elment in the keys and default_values arrays must be padded with trailing blanks if necessary to give the elements
+  ! a uniform length.  The actual_values elements are similarly padded if necessary to give them a uniform length.
+  function get_keyword_values(keys,default_values) result(actual_values)
+    character(len=*), dimension(:), intent(in) :: keys
+    character(len=*), dimension(:), intent(in) :: default_values
+    character(len=:), dimension(:), allocatable :: actual_values
+    character(len=:), allocatable :: key_value_pair,trimmed_argument,trimmed_value
+    character(len=1), parameter :: divider="="
+    integer divider_position,error_flag,i,j
+
+    ! Requires
+    call assert(size(keys)==size(default_values))
+
+    actual_values=default_values
+    allocate(key_value_pair,source=repeat(" ",ncopies=max_argument_length()) )
+    ! Read the text of the arguments passed on the command line
+    do i=1,command_argument_count()
+      call get_command_argument(i,key_value_pair,status=error_flag)
+      call check(error_flag)
+      divider_position = scan(key_value_pair,divider)
+      if (divider_position==0) stop "Invalid argument format (expected: 'argument=value')."
+      trimmed_argument = trim(key_value_pair(1:divider_position-1))
+      trimmed_value = trim(key_value_pair(divider_position+1:))
+      if (len(trimmed_value)==0) stop "Invalid value format (expected: 'argument=value')."
+      do j=1,size(keys)
+        if (trim(keys(j))==trimmed_argument) actual_values(j)=trimmed_value
+      end do
+    end do
+
+    ! Ensures
+    call assert(size(actual_values)==size(keys))
+  contains
+    function max_argument_length()
+      integer max_argument_length,n,length_of_argument_n
+      max_argument_length=0
+      do n=1,command_argument_count()
+        call get_command_argument(n,key_value_pair,status=error_flag,length=length_of_argument_n)
+        call check(error_flag)
+        if (length_of_argument_n>max_argument_length) max_argument_length = length_of_argument_n
+      end do
+    end function
+    subroutine check(flag)
+      integer, intent(in) :: flag
+      select case(flag)
+        case(-1)
+          ! this should never occur because key_value_pair is dynamically sized to match the length of the longest argument
+          print *,"main: argument ",i,"exceeds maximum length of ",max_argument_length()
+          stop
+        case(1:)
+          print *,"main: error in reading the argument name (status=",flag,")"
+          stop
+        case(0)
+          ! argument_text read successfully
+        case default
+          stop "main: invalid status (compiler error)"
+      end select
+    end subroutine
+  end function
+end module
diff --git a/src/tests/integration/pde_solvers/navier-stokes/CMakeLists.txt b/src/tests/integration/pde_solvers/navier-stokes/CMakeLists.txt
new file mode 100644
index 0000000..f236f6f
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/CMakeLists.txt
@@ -0,0 +1,19 @@
+# checking whether the machine is of type 64-bit before proceeding further
+if ( ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "x86_64") AND ("${CMAKE_SYSTEM_NAME}" MATCHES "LINUX") )
+  # Default to older SSE-instruction-based FFT library
+  if (NOT (DEFINED ENV{TRAVIS}))
+    if (LEGACY_ARCHITECTURE OR (NOT DEFINED(LEGACY_ARCHITECTURE)))
+      set(fft_library ${CMAKE_CURRENT_SOURCE_DIR}/libfft_sse.a )
+    else()
+      message(WARNING "Attempting to use libfft_avx.a, which only works on recent x86 architectures.")
+      set(fft_library ${CMAKE_CURRENT_SOURCE_DIR}/libfft_avx.a )
+    endif()
+    add_executable( coarray_navier_stokes
+      coarray-shear_coll.F90
+      ${walltime_o}
+    )
+    target_link_libraries(coarray_navier_stokes OpenCoarrays ${fft_library})
+  endif()
+else()
+  # Skip Navier-Stokes build until an appropriate FFT has been found.
+endif()
diff --git a/src/tests/integration/pde_solvers/navier-stokes/Makefile b/src/tests/integration/pde_solvers/navier-stokes/Makefile
new file mode 100644
index 0000000..b076f5a
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/Makefile
@@ -0,0 +1,39 @@
+# Choose desired compiler (options: gnu, intel, cray):
+compiler=gnu
+fft=libfft_sse.a
+opencoarrays_dir=/opt/opencoarrays/lib
+
+ifeq ($(compiler),gnu)
+  opt=-L $(opencoarrays_dir) -fcoarray=lib -Ofast
+  libcaf_dir = $(opencoarrays_dir)
+  compile=mpif90
+  lib=-lcaf_mpi
+else
+  ifeq ($(compiler),intel)
+    compile=ifort -coarray=shared -standard-semantics -O3 -coarray-num-images=2
+  else
+    ifeq ($(compiler),cray)
+      compile=ftn -ew -h caf
+    endif
+  endif
+endif
+
+objects = walltime.o
+
+all: coarray-shear-collective mpi-shear
+
+coarray-shear-collective: coarray-shear_coll.o $(objects) Makefile
+	$(compile) $(opt)  coarray-shear_coll.o $(objects) -o coarray-shear $(lib) $(fft)
+
+mpi-shear: mpi-shear.o $(objects) Makefile
+	$(compile)  mpi-shear.o $(objects) -o mpi-shear $(fft)
+
+coarray-shear_coll.o: coarray-shear_coll.F90 $(objects) Makefile
+	$(compile) $(opt)  -c coarray-shear_coll.F90
+
+mpi-shear.o: mpi-shear.f90 $(objects) Makefile
+	$(compile)  -c mpi-shear.f90
+
+.PHONY : clean
+clean :
+	-rm -f  coarray-shear_coll.o coarray-shear mpi-shear.o mpi-shear core fort.* *.mod
diff --git a/src/tests/integration/pde_solvers/navier-stokes/Makefile_NS_GASNET b/src/tests/integration/pde_solvers/navier-stokes/Makefile_NS_GASNET
new file mode 100644
index 0000000..2440348
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/Makefile_NS_GASNET
@@ -0,0 +1,38 @@
+# Choose desired compiler (options: gnu, intel, cray):
+include /home/rouson/Downloads/GASNet-1.22.4/smp-conduit/smp-par.mak
+compiler=gnu
+executable=coarray-shear
+fft=fft_sse.a
+opencoarrays_dir=/opt/opencoarrays/lib
+
+ifeq ($(compiler),gnu)
+  opt=-L $(opencoarrays_dir)
+  compile=gfortran -fcoarray=lib
+  lib=-lcaf_gasnet
+else
+  ifeq ($(compiler),intel)
+    compile=ifort -coarray=shared -standard-semantics -O3 -coarray-num-images=2
+  else
+    ifeq ($(compiler),cray)
+      compile=ftn -ew -h caf
+    endif
+  endif
+endif
+
+objects = walltime.o
+
+$(executable): coarray-shear.o $(objects) Makefile
+	$(GASNET_LD) $(GASNET_LDFLAGS) $(LIBCAF_FLAGS) $(opt) coarray-shear.o $(objects) -lgfortran -lm -o $(executable) $(lib) $(fft) $(GASNET_LIBS)
+
+coarray-shear.o: coarray-shear.f90 $(objects) Makefile
+	$(compile) -c coarray-shear.f90
+
+global_field.o: global_field.f90 local_field.o Makefile
+	$(compile) -c global_field.f90
+
+local_field.o: local_field.f90 Makefile
+	$(compile) $(OPTS) -c local_field.f90
+
+.PHONY : clean
+clean :
+	-rm -f  coarray-shear.o *.mod $(executable) core fort.*
diff --git a/src/tests/integration/pde_solvers/navier-stokes/coarray-shear.f90 b/src/tests/integration/pde_solvers/navier-stokes/coarray-shear.f90
new file mode 100644
index 0000000..09b8dac
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/coarray-shear.f90
@@ -0,0 +1,1018 @@
+! Coarray 3D Navier-Stokes Solver Test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+
+!(*----------------------------------------------------------------------------------------------------------------------
+!         basic in-core shear code ( 7 words/node, not threaded, in-core, no file read/write )
+!------------------------------------------------------------------------------------------------------------------------*)
+
+! Define universal constants:
+! In the case of exactly representable numbers, the definitions are useful
+! to ensure subprogram argument type/kind/rank matching without having to
+! repeat kind specifiers everywhere.
+module constants_module
+  use iso_fortran_env, only : int64
+  implicit none
+  private
+  public :: one,zero
+  integer(int64), parameter :: one=1_int64,zero=0_int64
+end module
+
+! Initialize the random seed with a varying seed to ensure a different
+! random number sequence for each invocation of subroutine, e.g. for
+! invocations on different images of a coarray parallel program.
+! Setting any seed values to zero is deprecated because it can result
+! in low-quality random number sequences.
+! (Source: https://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html)
+module random_module
+  implicit none
+  private
+  public :: init_random_seed
+contains
+  subroutine init_random_seed()
+            use iso_fortran_env, only: int64
+            implicit none
+            integer, allocatable :: seed(:)
+            integer :: i, n, un, istat, dt(8), pid
+            integer(int64) :: t
+
+            call random_seed(size = n)
+            allocate(seed(n))
+            ! First try if the OS provides a random number generator
+            open(newunit=un, file="/dev/urandom", access="stream", &
+                 form="unformatted", action="read", status="old", iostat=istat)
+            if (istat == 0) then
+               if (this_image()==1) print *,"OS provides random number generator"
+               read(un) seed
+               close(un)
+            else
+               if (this_image()==1) print *,"OS does not provide random number generator"
+               ! Fallback to XOR:ing the current time and pid. The PID is
+               ! useful in case one launches multiple instances of the same
+               ! program in parallel.
+               call system_clock(t)
+               if (t == 0) then
+                  call date_and_time(values=dt)
+                  t = (dt(1) - 1970) * 365_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(2) * 31_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(3) * 24_int64 * 60 * 60 * 1000 &
+                       + dt(5) * 60 * 60 * 1000 &
+                       + dt(6) * 60 * 1000 + dt(7) * 1000 &
+                       + dt(8)
+               end if
+               pid = getpid()
+               t = ieor(t, int(pid, kind(t)))
+               do i = 1, n
+                  seed(i) = lcg(t)
+               end do
+            end if
+            call random_seed(put=seed)
+          contains
+            ! This simple PRNG might not be good enough for real work, but is
+            ! sufficient for seeding a better PRNG.
+            function lcg(s)
+              integer :: lcg
+              integer(int64) :: s
+              if (s == 0) then
+                 s = 104729
+              else
+                 s = mod(s, 4294967296_int64)
+              end if
+              s = mod(s * 279470273_int64, 4294967291_int64)
+              lcg = int(mod(s, int(huge(0), int64)), kind(0))
+            end function lcg
+          end subroutine init_random_seed
+end module random_module
+
+module run_size
+    use iso_fortran_env, only : int64,real64 ! 64-bit integer and real kind parameters
+    use constants_module, only : one         ! 64-bit unit to ensure argument kind match
+    implicit none
+        real, codimension[*] ::  viscos, shear, b11, b22, b33, b12, velmax
+        integer(int64), codimension[*] ::  nx, ny, nz, nsteps, output_step
+        integer(int64), codimension[*] :: my, mx, first_y, last_y, first_x, last_x
+        real(real64), codimension[*] :: cpu_time, tran_time, sync_time, total_time
+        real(real64), codimension[*] :: max_cpu_time, max_tran_time, max_sync_time, max_total_time
+        real(real64), codimension[*] :: min_cpu_time, min_tran_time, min_sync_time, min_total_time
+
+        real ::  time, cfl, dt
+        integer(int64) :: my_node, num_nodes
+        real, parameter :: pi = 3.141592653589793
+
+contains
+
+    subroutine max_velmax()
+        integer(int64) :: i
+
+        sync all
+        if( my_node == 1) then
+            do i = 2, num_nodes;     velmax = max( velmax, velmax[i] );    end do
+        end if
+        sync all
+        if (my_node>1) velmax = velmax[1]
+        sync all
+    end subroutine max_velmax
+
+    subroutine global_times()
+        integer(int64) :: i, stage
+
+        max_cpu_time = cpu_time
+        max_tran_time = tran_time
+        max_total_time = sync_time
+        max_total_time = total_time
+        min_cpu_time = cpu_time
+        min_tran_time = tran_time
+        min_total_time = sync_time
+        min_total_time = total_time
+
+        do stage = 1, num_nodes-1
+            i = 1 + mod( my_node-1+stage, num_nodes )
+            max_cpu_time = max( max_cpu_time, cpu_time[i] )
+            min_cpu_time = min( min_cpu_time, cpu_time[i] )
+            max_tran_time = max( max_tran_time, tran_time[i] )
+            min_tran_time = min( min_tran_time, tran_time[i] )
+            max_sync_time = max( max_sync_time, sync_time[i] )
+            min_sync_time = min( min_sync_time, sync_time[i] )
+            max_total_time = max( max_total_time, total_time[i] )
+            min_total_time = min( min_total_time, total_time[i] )
+        end do
+        sync all
+    end subroutine global_times
+
+subroutine copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
+  implicit none
+  complex, intent(in)  :: A(0:*)
+  complex, intent(out) :: B(0:*)
+  integer(int64), intent(in) :: n1, sA1, sB1
+  integer(int64), intent(in) :: n2, sA2, sB2
+  integer(int64), intent(in) :: n3, sA3, sB3
+  integer(int64) i,j,k
+
+  do k=0,n3-1
+     do j=0,n2-1
+        do i=0,n1-1
+           B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
+        end do
+     end do
+  end do
+end subroutine copy3
+
+end module run_size
+
+program cshear
+
+  !(***********************************************************************************************************
+  !                   m a i n   p r o g r a m
+  !***********************************************************************************************************)
+      use iso_fortran_env, only : int64,real64 ! 64-bit integer and real kind parameters
+      use run_size
+      implicit none
+
+      interface
+        subroutine solve_navier_stokes
+        end subroutine solve_navier_stokes
+      end interface
+
+nx=128;ny=128;nz=128
+viscos=0.; shear=0.
+b11=1.; b22=1.; b33=1.; b12 =0.
+nsteps=10;output_step=1
+
+      num_nodes = num_images()
+      my_node = this_image()
+
+      if( my_node == 1 ) then
+
+!       write(6,*) "nx,ny,nz : ";               read(5,*) nx, ny, nz
+        if ( mod(nx/2,num_nodes) /= 0) then;    write(6,*) "nx/2 not multiple of num_nodes";    stop;   end if
+        if ( mod(ny,num_nodes) /= 0) then;      write(6,*) " ny not multiple of num_nodes";     stop;   end if
+
+!       write(6,*) "viscos, shear : ";          read(5,*) viscos, shear
+!       write(6,*) "b11 b22 b33 b12 : ";        read(5,*) b11, b22, b33, b12
+!       write(6,*) "nsteps, output_step : ";    read(5,*) nsteps, output_step
+
+        write(6,fmt="(3(A,i4))") "nx =",nx,   "   ny =",ny,   "   nz =",nz
+        write(6,fmt="(2(A,f7.3))") "viscos = ", viscos, "      shear = ", shear
+        write(6,fmt="(A,4f7.3)") "b11 b22 b33 b12 = ", b11, b22, b33, b12
+        write(6,fmt="(2(A,i6))") "nsteps = ", nsteps, "       output_step = ", output_step
+
+        write(6,fmt="(A,i4,A)") "----------------- running on ", num_nodes, " images -------------------"
+
+      end if
+
+      sync all  !--- images > 1 wait on inputs from image = 1 !
+
+      if( my_node > 1 ) then
+        nx = nx[1];     ny = ny[1];     nz = nz[1]
+        viscos = viscos[1];     shear = shear[1]
+        b11 = b11[1];   b22 = b22[1];   b33 = b33[1];   b12 = b12[1]
+        nsteps = nsteps[1];     output_step = output_step[1]
+      end if
+
+ 	    mx = nx/2 / num_nodes;  first_x = (my_node-1)*mx + 1;   last_x  = (my_node-1)*mx + mx
+        my = ny / num_nodes;    first_y = (my_node-1)*my + 1;   last_y  = (my_node-1)*my + my
+
+      if(my_node == 1 ) write(6,fmt="(A, f6.2)") "message size (MB) = ", real(nz*4*mx*my*8)/real(1024*1024)
+
+    call solve_navier_stokes
+
+end program cshear
+
+!  (***********************************************************************************************************
+!             n a v i e r - s t o k e s   s o l v e r
+!  ************************************************************************************************************)
+
+  subroutine solve_navier_stokes
+      use run_size
+      implicit none
+
+  !(*****************************   declarations     ****************************************)
+
+       integer(int64) ::  stop, rflag, oflag, step, rkstep, nshells
+       real ::  k1(nx/2), k2(ny), k3(nz), mk1(nx/2), mk2(ny), mk3(nz) &
+              , kx(nx/2), ky_(nx/2,ny), ky(nx/2,ny), kz(nz)
+       complex :: sx(nx/2,3), sy(ny,3), sz(nz,3)
+       integer(int64) :: trigx, trigy, trigz, trigxy
+
+       complex, allocatable ::  u(:,:,:,:)[:]    ! u(nz,4,first_x:last_x,ny)[*]    !(*-- x-y planes --*)
+       complex, allocatable ::  ur(:,:,:,:)[:]   !ur(nz,4,first_y:last_y,nx/2)[*]  !(*-- x-z planes --*)
+       complex, allocatable ::  un(:,:,:,:)      !un(nz,3,first_x:last_x,ny)[*]    !(*-- x-y planes --*)
+       complex, allocatable :: bufr_X_Y(:,:,:,:)
+       complex, allocatable :: bufr_Y_X(:,:,:,:)
+
+interface
+!--------    note: integer(int64)'s required for FFT's and other assembly-coded externals   ------
+
+   function ctrig( len ) bind(C)               !(*-- define complex FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in) :: len
+        integer(int64) :: ctrig     !-- C pointer!
+   end function ctrig
+
+   function rtrig( len ) bind(C)              !(*-- define real FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in):: len
+        integer(int64) :: rtrig     !-- C pointer!
+   end function rtrig
+
+   subroutine cfft( len, lot, data, inc, jmp, ctrig, isign ) bind(C)    !(*-- complex FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, ctrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine cfft
+
+   subroutine rfft( len, lot, data, inc, jmp, rtrig, isign ) bind(C)   !(*-- real FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, rtrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine rfft
+
+   function WALLTIME() bind(C, name = "WALLTIME")
+       import real64
+       real(real64) :: WALLTIME
+   end function WALLTIME
+
+
+end interface
+
+       trigx = rtrig( nx )
+       trigy = ctrig( ny )
+       trigz = ctrig( nz )
+	   trigxy = ctrig( nx+ny )
+
+       allocate (  u(nz , 4 , first_x:last_x , ny)[*] )     !(*-- y-z planes --*)
+       allocate ( ur(nz , 4 , first_y:last_y , nx/2)[*] )   !(*-- x-z planes --*)
+       allocate ( un(nz , 3 , first_x:last_x , ny) )        !(*-- y-z planes --*)
+       allocate ( bufr_X_Y(nz,4,mx,my) )
+       allocate ( bufr_Y_X(nz,4,my,mx) )
+
+
+       stop = 0;   step = 0;  rkstep = 2;  rflag = 0;  cfl = 1;   dt = 0
+       nshells = max( nx,ny,nz )
+
+                        call define_kspace
+                        call define_field
+                        call enforce_conjugate_symmetry
+						call copy_n_s
+                        call define_shifts
+
+       total_time =  -WALLTIME()      !-- start the clock
+
+       tran_time = 0;       cpu_time = -WALLTIME()
+
+  !(*********************************   begin execution loop   *****************************************)
+
+       do while (stop == 0)
+
+                        call phase1
+            rkstep = 1
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call define_step
+                        call define_shifts
+                        call phase3
+                        call pressure
+		if (oflag /= 0) call spectra
+                        call advance
+                        call phase1
+            rkstep = 2
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call phase3
+                        call advance
+                        call pressure
+        if (rflag /= 0) call remesh
+                        call copy_s_n
+
+                		step = step + 1
+                        time = time + dt
+       end do
+
+  !(*********************************   end execution loop   ***********************************************)
+
+       deallocate ( u, ur, un )
+       deallocate ( bufr_X_Y );    deallocate ( bufr_Y_X )
+       sync all     !-- wait for all images to finish!
+
+       total_time =  total_time + WALLTIME()    !-- stop the clock
+       cpu_time =  cpu_time + WALLTIME()    !-- stop the clock
+       call global_times
+
+       if (my_node == 1 )   write(6,fmt="(3(10X,A,2f7.2))")  &
+                            , "total_time ", min_total_time/step, max_total_time/step &
+                            , "cpu_time ", min_cpu_time/step, max_cpu_time/step &
+                            , "tran_time ", min_tran_time/step, max_tran_time/step
+
+
+       write(6,fmt="(A,i4,3f7.2)")  "image ", my_node, total_time/step, cpu_time/step, tran_time/step
+
+
+contains
+
+ !(***********************************************************************************************************
+ !                          transpose the Y and Z planes
+ !***********************************************************************************************************)
+
+!-----                   u(nz,4,mx,my*num_nodes) [num_nodes]
+!-----                  ur(nz,4,my,mx*num_nodes) [num_nodes]
+!-----                bufr(nz,4,my,mx) or bufr(nz,4,mx,my)
+
+!-------------   out-of-place transpose data_s --> data_r  ----------------------------
+
+ subroutine transpose_X_Y
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    integer(int64) :: i,stage
+
+    cpu_time = cpu_time + WALLTIME()
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (    u(1,1,first_x,1+(my_node-1)*my) &                   !-- intra-node transpose
+                ,  ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- no inter-node transpose needed
+                ,   nz*3, one, one        &                                 !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        bufr_X_Y(:,:,:,:) = u(:,:,:,1+(my_node-1)*my:my_node*my)[i]         !-- inter-node transpose to buffer
+        call copy3 ( bufr_X_Y, ur(1,1,first_y,1+(i-1)*mx)  &                !-- intra-node transpose from buffer
+                        ,   nz*3, one, one        &                             !-- note: only 3 of 4 words needed
+                        ,   mx, nz*4, nz*4*my &
+                        ,   my, nz*4*mx, nz*4 )
+    end do
+
+    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time = cpu_time - WALLTIME()
+
+ end  subroutine transpose_X_Y
+
+!-------------   out-of-place transpose data_r --> data_s  ----------------------------
+
+subroutine transpose_Y_X
+    use run_size
+    implicit none
+
+    integer(int64) :: i, stage
+
+    cpu_time = cpu_time + WALLTIME()
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (   ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- intra-node transpose
+                ,   u(1,1,first_x,1+(my_node-1)*my) &                   !-- no inter-node transpose needed
+                ,   nz*4, one, one        &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        bufr_Y_X(:,:,:,:) = ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i]        !-- inter-node transpose to buffer
+        call copy3 ( bufr_Y_X, u(1,1,first_x,1+(i-1)*my)  &                 !-- intra-node transpose from buffer
+                    ,   nz*4, one, one        &
+                    ,   my, nz*4, nz*4*mx &
+                    ,   mx, nz*4*my, nz*4 )
+    end do
+
+    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time = cpu_time - WALLTIME()
+
+ end  subroutine transpose_Y_X
+
+
+!(*************************************************************************************************************
+!           enforce conjugate symmetry for plane kx=0 of wavespace  (half of this plane is redundant)
+!***************************************************************************************************************)
+
+  subroutine enforce_conjugate_symmetry
+
+    integer(int64) ::  i, x, y, z
+
+!(*------------------------- un( K ) = conjg( un( -K ) ) ---------------------*)
+
+    if (my_node == 1 ) then     !-- x=1 is in node=1
+        x = 1
+        do i = 1, 3
+            z = 1;              y = 1;              un(z,i,x,y) = 0
+            z = 1;              do y = 2, ny/2;     un(z,i,x,y) = conjg( un(z,i,x,ny+2-y) );        end do
+            do z = 2, nz/2;     y = 1;              un(z,i,x,y) = conjg( un(nz+2-z,i,x,y) );        end do
+            do z = 2, nz/2;     do y = 2, ny;       un(z,i,x,y) = conjg( un(nz+2-z,i,x,ny+2-y) );   end do;	end do
+        end do
+    end if
+end  subroutine enforce_conjugate_symmetry
+
+ !(***********************************************************************************************************
+ !                spectra :  accumulate spectra and other statistics over flow field
+ !***********************************************************************************************************)
+
+ subroutine  spectra
+
+      use run_size
+     implicit none
+
+    integer(int64) :: k, x, y, z
+    real :: kk, ww, uw, uu, uv, duu, factor   &
+          , ek(nshells), dk(nshells), hk(nshells), tk(nshells), sample(nshells)
+    real, save, codimension[*] ::  sum_ek, sum_dk, sum_hk, sum_tk
+
+      total_time =  total_time + WALLTIME()     !-- stop the clock!  time/step does not include spectra time
+
+	  oflag = 0
+      ek = 0;       dk = 0;       hk = 0;     tk = 0;   sample = 0
+
+ !(*---------------------   three dimensional spectra  -----------------------*)
+
+      do x = first_x, last_x;   do y = 1, ny;      do  z = 1, nz
+
+            if( mk1(x)+mk2(y)+mk3(z) > 2./9. ) &
+                              then;   factor = 0
+            else if (x == 1)  then;   factor = 1
+                              else;   factor = 2
+            end if
+
+            kk = kx(x)**2 + ky(x,y)**2 + kz(z)**2
+            k = 1 + int( sqrt( kk ) + 0.5 )
+
+			  uu = factor * real( un(z,1,x,y) * conjg( un(z,1,x,y) ) &
+                                + un(z,2,x,y) * conjg( un(z,2,x,y) ) &
+                                + un(z,3,x,y) * conjg( un(z,3,x,y) ) )
+              ww = kk * uu
+              uv = factor * real( un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+              uw = factor * 2 * aimag( kx(x) *   un(z,2,x,y) * conjg( un(z,3,x,y) ) &
+                                     + ky(x,y) * un(z,3,x,y) * conjg( un(z,1,x,y) ) &
+                                     + kz(z) *   un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+			  duu = factor * real( un(z,1,x,y) * conjg( u(z,1,x,y) ) &
+                                 + un(z,2,x,y) * conjg( u(z,2,x,y) ) &
+								 + un(z,3,x,y) * conjg( u(z,3,x,y) ) ) / (dt/2) + shear * uv
+
+              sample(k) = sample(k) + factor        !(*-- shell sample --*)
+              ek(k) = ek(k) + uu                    !(*-- 2 * energy sum --*)
+              dk(k) = dk(k) + ww                    !(*-- enstrophy sum --*)
+              hk(k) = hk(k) + uw                    !(*-- helicity sum --*)
+              tk(k) = tk(k) + duu                   !(*-- transfer sum --*)
+
+      end do;   end do;  end do
+
+ !(************************     finished accumulation :  compute final statistics     *************************)
+
+    sum_ek = 0;     sum_dk = 0;     sum_hk = 0;     sum_tk = 0
+    do k = nshells, 1, -1
+        sum_ek  = sum_ek + ek(k)
+        sum_dk  = sum_dk + dk(k)
+        sum_hk  = sum_hk + hk(k)
+        sum_tk  = sum_tk + tk(k)
+    end do
+
+    sync all
+    if (my_node == 1)  then
+        do k = 2, num_nodes
+            sum_ek = sum_ek + sum_ek[k]
+            sum_dk = sum_dk + sum_dk[k]
+            sum_hk = sum_hk + sum_hk[k]
+            sum_tk = sum_tk + sum_tk[k]
+        end do
+
+        if (step == 0)   write(6,*) "step   time     energy    enstrophy   helicity   transfer"
+        write(6,fmt="(i3, 5e11.3)")  step,  time,    sum_ek,   sum_dk,     sum_hk,    sum_tk
+    end if
+
+     total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end  subroutine  spectra
+
+  !(************************************************************************************************************
+  !        define_field  :   define initial flow field from scratch
+  !************************************************************************************************************)
+
+ subroutine define_field
+
+    use constants_module, only : zero
+    use run_size
+    use random_module
+    implicit none
+
+    real ::     k, k12, f, phi, theta1, theta2
+    complex ::  alpha, beta
+    integer(int64) ::  x, y, z
+    real, parameter :: klo=8, khi=16
+
+   call init_random_seed !(* seed a different pseudo-random number sequence for each image *)
+   time = 0
+
+   do x = first_x, last_x
+       do  y = 1, ny
+            do z = 1, nz
+                 call random_number(theta1 )
+                 call random_number(theta2 )
+                 call random_number(phi    )
+                 k   = sqrt( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                 k12 = sqrt( kx(x)**2 + ky(x,y)**2 )
+
+                 if ( k == 0  .or.  mk1(x)+mk2(y)+mk3(z)>2./9.  .or.  k < klo  .or.  k > khi ) &
+                     then;   f = 0
+                     else;   f = sqrt( 1./(2*pi) ) / k
+                 end if
+
+                 alpha = f * exp( (0,2) * pi * theta1 ) * cos( 2*pi * phi )
+                 beta  = f * exp( (0,2) * pi * theta2 ) * sin( 2*pi * phi )
+
+                 if (k12 == 0) &
+                 then; un(z,1,x,y) = alpha
+                       un(z,2,x,y) = beta
+                       un(z,3,x,y) = 0
+
+                 else; un(z,1,x,y) = ( beta * kz(z) * kx(x)   + alpha * k * ky(x,y) ) / ( k * k12 )
+                       un(z,2,x,y) = ( beta * kz(z) * ky(x,y) - alpha * k * kx(x)   ) / ( k * k12 )
+                       un(z,3,x,y) = - beta * k12 / k
+                 end if
+
+   end do;  end do; end do
+ end  subroutine define_field
+
+ !(***********************************************************************************************************
+ !          define_shifts  :    define coordinate shifts for control of 1-d alias errors
+ ! ***********************************************************************************************************)
+
+    subroutine  define_shifts
+    use constants_module, only : zero
+    use run_size
+    implicit none
+
+           integer(int64) ::  x, y, z
+           integer(int64), save ::  init = 0
+           real :: delta_x, delta_y, delta_z
+           integer :: i,seed_size
+
+           if (init == 0) &     !-- Note: delta's not carried over from previous run
+           then;
+                init = 1
+                call random_seed(size=seed_size)
+                call random_seed(put=[(1234567,i=1,seed_size)])!(* same random numbers for each image! *)
+                do  x = 1, nx/2;  sx(x,3) = exp (  (0,1) * ( pi / nx ) * k1(x) ); end do
+                do  y = 1, ny  ;  sy(y,3) = exp (  (0,1) * ( pi / ny ) * k2(y) ); end do
+                do  z = 1, nz  ;  sz(z,3) = exp (  (0,1) * ( pi / nz ) * k3(z) ); end do
+            else;
+                call random_number(delta_x); delta_x = 2*pi / nx * delta_x
+                do  x = 1, nx/2;  sx(x,1) = sx(x,3)
+                                  sx(x,2) = exp (  (0,1) * delta_x * k1(x) )
+                                  sx(x,3) = exp (  (0,1) * ( delta_x + pi / nx ) * k1(x) ); end do
+
+                call random_number(delta_y); delta_y = 2*pi / ny * delta_y
+                do  y = 1, ny  ;  sy(y,1) = sy(y,3)
+                                  sy(y,2) = exp (  (0,1) * delta_y * k2(y) )
+                                  sy(y,3) = exp (  (0,1) * ( delta_y + pi / ny ) * k2(y) ); end do
+
+                call random_number(delta_z); delta_z = 2*pi / nz * delta_z
+                do  z = 1, nz  ;  sz(z,1) = sz(z,3)
+                                  sz(z,2) = exp (  (0,1) * delta_z * k3(z) )
+                                  sz(z,3) = exp (  (0,1) * ( delta_z + pi / nz ) * k3(z) ); end do
+           end if
+
+ end  subroutine  define_shifts
+
+  !(***********************************************************************************************************
+  !       define_step  :   update time, metric, shifts for the next step
+  !**********************************************************************************************************)
+
+  subroutine  define_step
+      use run_size
+      implicit none
+
+    sync all
+
+    if (cfl /= 0) then
+cpu_time = cpu_time + WALLTIME()
+        call max_velmax
+cpu_time = cpu_time - WALLTIME()
+        dt = cfl / velmax
+    end if
+
+    if (        shear > 0               &
+        .and.  .01*b11*shear*dt < b12   &
+        .and.  b12 <= b11*shear*dt )    then
+            dt = b12 / ( b11 * shear )          !(* limit dt, hit the orthognal mesh *)
+            oflag = 1
+    else if ( mod (step,output_step) == 0 ) then
+            oflag = 1
+    end if
+
+    b12 = b12 - b11 * shear * dt
+
+    if ( b12 < -b22/2 )     rflag = 1                                   !(* remesh at the end of the step? *)
+    if ( step == nsteps )   stop = 1                                    !(* last step? *)
+
+ end   subroutine    define_step
+
+   !(***********************************************************************************************************
+   !      define_kspace  :   define physical wavespace from computational wavespace and metric
+   !**********************************************************************************************************)
+
+  subroutine    define_kspace
+    use run_size
+    implicit none
+
+     integer(int64) ::  x, y, z
+
+       do  x = 1, nx/2   ;   k1(x) = x - 1;     end do
+       do  y = 1, ny/2+1 ;   k2(y) = y - 1;     end do
+       do  z = 1, nz/2+1 ;   k3(z) = z - 1;     end do
+
+       do  y = ny/2+2, ny;   k2(y) = y - 1 - ny;    end do
+       do  z = nz/2+2, nz;   k3(z) = z - 1 - nz;    end do
+
+       do  x = 1, nx/2 ;     mk1(x) = (k1(x)/nx)**2;    kx(x) =   b11 * k1(x);  end do
+       do  z = 1, nz   ;     mk3(z) = (k3(z)/nz)**2;    kz(z) =   b33 * k3(z);  end do
+       do  y = 1, ny   ;     mk2(y) = (k2(y)/ny)**2
+                             do  x = 1, nx/2 ;    ky(x,y) = b22 * k2(y) + b12 * k1(x);  end do; end do
+
+end   subroutine    define_kspace
+
+  !(***********************************************************************************************************
+  !   phase 1 :  on entry, data-plane contains velocity in wave space.  interpolate database, shifted mesh,
+  !              and proceed to physical y space .
+  !************************************************************************************************************)
+
+  subroutine  phase1
+
+      use run_size
+      implicit none
+
+    complex :: shift
+    integer(int64) :: i, x, y, z
+
+   	do  x = first_x, last_x
+
+   		do  y = 1, ny;    do  z = 1, nz
+             shift = sz(z,rkstep+1) * sy(y,rkstep+1) * sx(x,rkstep+1)
+             u(z,1,x,y) = shift * u(z,1,x,y)
+             u(z,2,x,y) = shift * u(z,2,x,y)
+             u(z,3,x,y) = shift * u(z,3,x,y)
+        end do; end do
+
+!(*---------------------------   LEAVING FOURIER WAVE SPACE  --------------------------*)
+
+        do i = 1, 3
+            call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, one );    end do
+	end do
+
+ end   subroutine  phase1
+
+ !(**********************************************************************************************************
+ !     phase 2 :  on entry, data-plane contains velocity in physical y space, and wave x,z space on shifted
+ !                mesh.  Proceed to physical x,z space,  form nonlinear terms, and return to wave x,z space.
+ !***********************************************************************************************************)
+
+ subroutine  phase2
+
+     use run_size
+     implicit none
+
+     complex :: s2(nz,nx/2), vs(nz,nx/2)
+     integer(int64) ::  i, x, y, z
+     real :: v2r, v2i, s2r, s2i, u1r, u1i, u2r, u2i, u3r, u3i, u4r, u4i
+
+  velmax = 0
+
+  do  y = first_y, last_y
+
+      do  x = 1, nx/2 ;  do  z = 1, nz ;   vs(z,x) = ur(z,2,y,x);    end do; end do
+
+      do  i = 1, 3
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, one )
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, one )
+      end do
+
+!(*----------------------------  WELCOME TO PHYSICAL SPACE  --------------------------*)
+
+      do  x = 1, nx/2;  do  z = 1, nz
+           u1r = real(ur(z,1,y,x));  u1i = aimag(ur(z,1,y,x))
+           u2r = real(ur(z,2,y,x));  u2i = aimag(ur(z,2,y,x))
+           u3r = real(ur(z,3,y,x));  u3i = aimag(ur(z,3,y,x))
+
+      if ( rkstep == 1 ) velmax = max( velmax &
+                                     , b11*nx*abs(u1r) + b22*ny*abs(u2r) + b33*nz*abs(u3r) &
+                                     , b11*nx*abs(u1i) + b22*ny*abs(u2i) + b33*nz*abs(u3i) )
+
+           v2r = u2r * u2r;             v2i = u2i * u2i
+           s2r = u1r * u3r;             s2i = u1i * u3i
+           u4r = u2r * u3r;             u4i = u2i * u3i
+           u3r = u3r * u3r  -  v2r;     u3i = u3i * u3i  -  v2i
+           u2r = u1r * u2r;             u2i = u1i * u2i
+           u1r = u1r * u1r  -  v2r;     u1i = u1i * u1i  -  v2i
+
+           s2(z,x)     = cmplx(s2r, s2i)
+           ur(z,1,y,x) = cmplx(u1r, u1i)
+           ur(z,2,y,x) = cmplx(u2r, u2i)
+           ur(z,3,y,x) = cmplx(u3r, u3i)
+           ur(z,4,y,x) = cmplx(u4r, u4i)
+      end do;   end do
+
+!(*----------------------------  LEAVING PHYSICAL SPACE  --------------------------*)
+
+      do  i = 1,  4
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, -one )
+           do  z = 1, nz ;   ur(z,i,y,1) = cmplx(real(ur(z,i,y,1)),0);   end do
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, -one )
+      end do
+
+      call rfft ( nx, nz, s2, nz, one, trigx, -one )
+      do  z = 1, nz ;   s2(z,1) = cmplx(real(s2(z,1)),0);    end do
+      call cfft ( nz, nx/2, s2, one, nz, trigz, -one )
+
+      do  x = 1, nx/2;  do  z = 1, nz
+          ur(z,1,y,x) = kx(x) * ur(z,1,y,x) + kz(z) * s2(z,x)   - (0,1) * 2*nx*nz*shear * vs(z,x)
+          ur(z,3,y,x) = kx(x) * s2(z,x)     + kz(z) * ur(z,3,y,x)
+      end do;   end do
+  end do
+
+ end  subroutine  phase2
+
+  !(***********************************************************************************************************
+  !     phase 3 :  on entry, the data-plane contains the four stresses on a shifted mesh in physical y space,
+  !                wave x,z space.   Return to y  wave space on unshifted mesh and complete time derivative of
+  !                velocity ( not divergence free yet )
+  !***********************************************************************************************************)
+
+  subroutine  phase3
+
+      use run_size
+      implicit none
+
+    integer(int64) :: i, x, y, z
+    complex :: shift
+
+    do  x = first_x, last_x
+
+       do i = 1, 4
+           call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, -one )
+       end do
+
+!(*---------------------------   WELCOME TO FOURIER WAVE SPACE  --------------------------*)
+
+       do  y = 1, ny ;   do  z = 1, nz
+               shift = -dt / (4*nx*ny*nz) * (0,1)*conjg( sy(y,rkstep) * sz(z,rkstep) * sx(x,rkstep) )
+               u(z,1,x,y) = shift * (         u(z,1,x,y) + ky(x,y) * u(z,2,x,y) )
+               u(z,2,x,y) = shift * ( kx(x) * u(z,2,x,y) + kz(z)   * u(z,4,x,y) )
+               u(z,3,x,y) = shift * (         u(z,3,x,y) + ky(x,y) * u(z,4,x,y) )
+       end do;  end do
+    end do
+
+ end   subroutine  phase3
+
+  !(***********************************************************************************************************
+  !   pressure :  add the gradient of a scalar, enforce continuity ( zero divergence )
+  !***********************************************************************************************************)
+
+  subroutine  pressure
+
+      use run_size
+      implicit none
+
+     complex :: psi
+     integer(int64) :: x, y, z
+
+      do x = first_x, last_x ;     do  y = 1, ny
+
+            if ( x /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( kx(x) * u(z,1,x,y) + ky(x,y) * u(z,2,x,y) + kz(z) * u(z,3,x,y) ) &
+                              / ( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                        u(z,1,x,y) = u(z,1,x,y) - kx(x) * psi
+                        u(z,2,x,y) = u(z,2,x,y) - ky(x,y) * psi
+                        u(z,3,x,y) = u(z,3,x,y) - kz(z) * psi
+                   end do
+			else if ( y /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( ky(1,y) * u(z,2,1,y) + kz(z) * u(z,3,1,y) ) &
+                              / ( ky(1,y)**2 + kz(z)**2 )
+                        u(z,2,1,y) = u(z,2,1,y) - ky(1,y) * psi
+                        u(z,3,1,y) = u(z,3,1,y) - kz(z) * psi
+                  end do
+            else
+                  do  z = 1, nz ;    u(z,3,1,1) = 0;     end do
+            end if
+	 end do;    end do
+
+end   subroutine  pressure
+
+!(*****************************************************************************************************************
+!                                remesh  :  remesh the sheared coordinate system
+!*****************************************************************************************************************)
+
+subroutine   remesh
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    complex :: u2(nx+ny,nz), shift(nx+ny)
+    integer(int64) :: i, x, y, z
+
+    write(6,fmt="(A,i4)") "remesh image ", my_node
+
+    total_time =  total_time + WALLTIME()     !-- stop the clock!
+
+    do x = first_x, last_x
+
+        do  y = 1, nx+ny ;   shift(y) =  exp( (0,-2) * pi / (nx+ny) * k1(x) * (y - 1) ) / (nx+ny);    end do
+
+        do  i = 1,  3
+            do  z = 1, nz
+                do  y = 1, ny/2           ;   u2(y,z) = u(z,i,x,y);     end do
+                do  y = ny/2+1, nx+ny/2+1 ;   u2(y,z) = 0;              end do
+                do  y = nx+ny/2+2, nx+ny  ;   u2(y,z) = u(z,i,x,y-nx);  end do
+            end do
+
+            call cfft ( nx+ny, nz, u2, one , nx+ny, trigxy, one )
+
+            do  z = 1, nz ;  do  y = 1, nx+ny ;     u2(y,z) = u2(y,z) * shift(y);  end do;  end do
+
+            call cfft ( nx+ny, nz, u2, one, nx+ny, trigxy, -one )
+
+            do  z = 1, nz
+                do  y = 1, ny/2
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y,z)
+                    end if
+                end do
+                do  y = ny/2+1, ny
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y+nx,z)
+                    end if
+                end do
+            end do
+        end do
+
+        do  y = 1, ny ;    ky(x,y) = ky(x,y) + b22 * k1(x);     end do           !(* update ky for this x *)
+
+    end do
+
+    b12 = b12 + b22;     rflag = 0                                               !(*   update metric, account for remesh    *)
+
+    total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end subroutine   remesh
+
+ !(***********************************************************************************************************
+ !         copy_n_s,   copy_s_n :  copy data between data_s and data_n
+ !***********************************************************************************************************)
+
+ subroutine  copy_n_s
+
+     use run_size
+     implicit none
+
+	integer(int64) ::  x, y, z
+
+	do y = 1, ny;    do x = first_x, last_x;   do z = 1, nz
+		u(z,1,x,y) = un(z,1,x,y)
+		u(z,2,x,y) = un(z,2,x,y)
+    	u(z,3,x,y) = un(z,3,x,y)
+	end do; end do; end do
+ end  subroutine  copy_n_s
+
+ subroutine  copy_s_n
+
+     use run_size
+     implicit none
+
+	integer(int64) ::  x, y, z
+
+	do y = 1, ny;	do x = first_x, last_x;     do z = 1, nz
+		un(z,1,x,y) = u(z,1,x,y)
+		un(z,2,x,y) = u(z,2,x,y)
+		un(z,3,x,y) = u(z,3,x,y)
+    end do; end do; end do
+
+ end  subroutine  copy_s_n
+
+ !(***********************************************************************************************************
+ !                         advance :     second-order runge-kutta time step algorithm
+ !***********************************************************************************************************)
+
+ subroutine  advance
+
+     use run_size
+     implicit none
+
+    integer(int64) ::  x, y, z
+    real :: factor, xyfac, zfac(nz)	  !(* viscous integrating factors *)
+
+    if (rkstep == 1) then
+        do z = 1, nz;     zfac(z) = exp( - viscos * dt * kz(z)**2 ); end do
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                ky_(x,y) = ky(x,y)
+                ky(x,y) = b22 * k2(y) + b12 * k1(x)
+
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        factor = zfac(z) * exp( - viscos * dt * ( kx(x)**2 + ( ky_(x,y)**2 + ky_(x,y)*ky(x,y) + ky(x,y)**2 )/3 ) )
+
+                        un(z,1,x,y) = factor * ( un(z,1,x,y) + u(z,1,x,y) )
+                        u(z,1,x,y)  = un(z,1,x,y) + factor * u(z,1,x,y)
+
+                        un(z,2,x,y) = factor * ( un(z,2,x,y) + u(z,2,x,y) )
+                        u(z,2,x,y)  = un(z,2,x,y) + factor * u(z,2,x,y)
+
+                        un(z,3,x,y) = factor * ( un(z,3,x,y) + u(z,3,x,y) )
+                        u(z,3,x,y)  = un(z,3,x,y) + factor * u(z,3,x,y)
+                    end if
+        end do;  end do; end do
+
+    else if (rkstep == 2) then
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        u(z,1,x,y) = un(z,1,x,y) + u(z,1,x,y)
+                        u(z,2,x,y) = un(z,2,x,y) + u(z,2,x,y)
+                        u(z,3,x,y) = un(z,3,x,y) + u(z,3,x,y)
+                    end if
+        end do; end do; end do
+
+    end if
+
+ end  subroutine  advance
+
+ end   subroutine solve_navier_stokes
diff --git a/src/tests/integration/pde_solvers/navier-stokes/coarray-shear_coll.F90 b/src/tests/integration/pde_solvers/navier-stokes/coarray-shear_coll.F90
new file mode 100644
index 0000000..86ffe67
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/coarray-shear_coll.F90
@@ -0,0 +1,1046 @@
+! Coarray 3D Navier-Stokes Solver Test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+
+!(*----------------------------------------------------------------------------------------------------------------------
+!         basic in-core shear code ( 7 words/node, not threaded, in-core, no file read/write )
+!------------------------------------------------------------------------------------------------------------------------*)
+
+! Define universal constants:
+! In the case of exactly representable numbers, the definitions are useful
+! to ensure subprogram argument type/kind/rank matching without having to
+! repeat kind specifiers everywhere.
+module constants_module
+  use iso_fortran_env, only : int64
+  implicit none
+  private
+  public :: one,zero
+  integer(int64), parameter :: one=1_int64,zero=0_int64
+end module
+
+! Initialize the random seed with a varying seed to ensure a different
+! random number sequence for each invocation of subroutine, e.g. for
+! invocations on different images of a coarray parallel program.
+! Setting any seed values to zero is deprecated because it can result
+! in low-quality random number sequences.
+! (Source: https://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html)
+module random_module
+  implicit none
+  private
+  public :: init_random_seed
+contains
+  subroutine init_random_seed()
+            use iso_fortran_env, only: int64
+            implicit none
+            integer, allocatable :: seed(:)
+            integer :: i, n, un, istat, dt(8), pid
+            integer(int64) :: t
+
+            call random_seed(size = n)
+            allocate(seed(n))
+            ! First try if the OS provides a random number generator
+            open(newunit=un, file="/dev/urandom", access="stream", &
+                 form="unformatted", action="read", status="old", iostat=istat)
+            if (istat == 0) then
+               if (this_image()==1) print *,"OS provides random number generator"
+               read(un) seed
+               close(un)
+            else
+               if (this_image()==1) print *,"OS does not provide random number generator"
+               ! Fallback to XOR:ing the current time and pid. The PID is
+               ! useful in case one launches multiple instances of the same
+               ! program in parallel.
+               call system_clock(t)
+               if (t == 0) then
+                  call date_and_time(values=dt)
+                  t = (dt(1) - 1970) * 365_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(2) * 31_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(3) * 24_int64 * 60 * 60 * 1000 &
+                       + dt(5) * 60 * 60 * 1000 &
+                       + dt(6) * 60 * 1000 + dt(7) * 1000 &
+                       + dt(8)
+               end if
+               pid = getpid()
+               t = ieor(t, int(pid, kind(t)))
+               do i = 1, n
+                  seed(i) = lcg(t)
+               end do
+            end if
+            call random_seed(put=seed)
+          contains
+            ! This simple PRNG might not be good enough for real work, but is
+            ! sufficient for seeding a better PRNG.
+            function lcg(s)
+              integer :: lcg
+              integer(int64) :: s
+              if (s == 0) then
+                 s = 104729
+              else
+                 s = mod(s, 4294967296_int64)
+              end if
+              s = mod(s * 279470273_int64, 4294967291_int64)
+              lcg = int(mod(s, int(huge(0), int64)), kind(0))
+            end function lcg
+          end subroutine init_random_seed
+end module random_module
+
+module run_size
+    use iso_fortran_env, only : int64,real64 ! 64-bit integer and real kind parameters
+    use constants_module, only : one         ! 64-bit unit to ensure argument kind match
+#ifndef HAVE_WALLTIME
+    use MPI, only : WALLTIME=>MPI_WTIME
+#endif
+    implicit none
+        real, codimension[*] ::  viscos, shear, b11, b22, b33, b12, velmax
+        integer(int64), codimension[*] ::  nx, ny, nz, nsteps, output_step
+        integer(int64), codimension[*] :: my, mx, first_y, last_y, first_x, last_x
+        real(real64), codimension[*] :: cpu_time_, tran_time, sync_time, total_time
+        real(real64), codimension[*] :: max_cpu_time, max_tran_time, max_sync_time, max_total_time
+        real(real64), codimension[*] :: min_cpu_time, min_tran_time, min_sync_time, min_total_time
+
+        real ::  time, cfl, dt
+        integer(int64) :: my_node, num_nodes
+        real, parameter :: pi = 3.141592653589793
+
+contains
+
+    subroutine max_velmax()
+        integer(int64) :: i
+
+        ! sync all
+        ! if( my_node == 1) then
+        !     do i = 2, num_nodes;     velmax = max( velmax, velmax[i] );    end do
+        ! end if
+        ! sync all
+        call co_max(velmax,1)
+        if (my_node>1) velmax = velmax[1]
+        sync all
+    end subroutine max_velmax
+
+    subroutine global_times()
+        integer(int64) :: i, stage
+
+        max_cpu_time = cpu_time_
+        max_tran_time = tran_time
+        max_total_time = sync_time
+        max_total_time = total_time
+        min_cpu_time = cpu_time_
+        min_tran_time = tran_time
+        min_total_time = sync_time
+        min_total_time = total_time
+
+        do stage = 1, num_nodes-1
+            i = 1 + mod( my_node-1+stage, num_nodes )
+            max_cpu_time = max( max_cpu_time, cpu_time_[i] )
+            min_cpu_time = min( min_cpu_time, cpu_time_[i] )
+            max_tran_time = max( max_tran_time, tran_time[i] )
+            min_tran_time = min( min_tran_time, tran_time[i] )
+            max_sync_time = max( max_sync_time, sync_time[i] )
+            min_sync_time = min( min_sync_time, sync_time[i] )
+            max_total_time = max( max_total_time, total_time[i] )
+            min_total_time = min( min_total_time, total_time[i] )
+        end do
+        sync all
+    end subroutine global_times
+
+subroutine copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
+  implicit none
+  complex, intent(in)  :: A(0:*)
+  complex, intent(out) :: B(0:*)
+  integer(int64), intent(in) :: n1, sA1, sB1
+  integer(int64), intent(in) :: n2, sA2, sB2
+  integer(int64), intent(in) :: n3, sA3, sB3
+  integer(int64) i,j,k
+
+  do k=0,n3-1
+     do j=0,n2-1
+        do i=0,n1-1
+           B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
+        end do
+     end do
+  end do
+end subroutine copy3
+
+end module run_size
+
+program cshear
+
+  !(***********************************************************************************************************
+  !                   m a i n   p r o g r a m
+  !***********************************************************************************************************)
+      use iso_fortran_env, only : int64,real64 ! 64-bit integer and real kind parameters
+      use run_size
+      implicit none
+
+      interface
+        subroutine solve_navier_stokes
+        end subroutine solve_navier_stokes
+      end interface
+
+      num_nodes = num_images()
+      my_node = this_image()
+
+      nx=128; ny=128; nz=128
+      viscos=0.; shear=0.
+      b11=1.; b22=1.; b33=1.; b12=0.
+      nsteps=5; output_step=1
+
+      if( my_node == 1 ) then
+
+       !write(6,*) "nx,ny,nz : ";               read(5,*) nx, ny, nz
+        if ( mod(nx/2,num_nodes) /= 0) then;    write(6,*) "nx/2 not multiple of num_nodes";    stop;   end if
+        if ( mod(ny,num_nodes) /= 0) then;      write(6,*) " ny not multiple of num_nodes";     stop;   end if
+
+       !write(6,*) "viscos, shear : ";          read(5,*) viscos, shear
+       !write(6,*) "b11 b22 b33 b12 : ";        read(5,*) b11, b22, b33, b12
+       !write(6,*) "nsteps, output_step : ";    read(5,*) nsteps, output_step
+
+        write(6,fmt="(3(A,i4))") "nx =",nx,   "   ny =",ny,   "   nz =",nz
+        write(6,fmt="(2(A,f7.3))") "viscos = ", viscos, "      shear = ", shear
+        write(6,fmt="(A,4f7.3)") "b11 b22 b33 b12 = ", b11, b22, b33, b12
+        write(6,fmt="(2(A,i6))") "nsteps = ", nsteps, "       output_step = ", output_step
+
+        write(6,fmt="(A,i4,A)") "----------------- running on ", num_nodes, " images -------------------"
+
+      end if
+
+      sync all  !--- images > 1 wait on inputs from image = 1 !
+
+      if( my_node > 1 ) then
+        nx = nx[1];     ny = ny[1];     nz = nz[1]
+        viscos = viscos[1];     shear = shear[1]
+        b11 = b11[1];   b22 = b22[1];   b33 = b33[1];   b12 = b12[1]
+        nsteps = nsteps[1];     output_step = output_step[1]
+      end if
+
+        mx = nx/2 / num_nodes;  first_x = (my_node-1)*mx + 1;   last_x  = (my_node-1)*mx + mx
+        my = ny / num_nodes;    first_y = (my_node-1)*my + 1;   last_y  = (my_node-1)*my + my
+
+      if(my_node == 1 ) write(6,fmt="(A, f6.2)") "message size (MB) = ", real(nz*4*mx*my*8)/real(1024*1024)
+
+    call solve_navier_stokes
+
+    if (this_image()==1) print *,"Test passed."
+
+end program cshear
+
+!  (***********************************************************************************************************
+!             n a v i e r - s t o k e s   s o l v e r
+!  ************************************************************************************************************)
+
+  subroutine solve_navier_stokes
+      use run_size
+      implicit none
+
+  !(*****************************   declarations     ****************************************)
+
+       integer(int64) ::  stop, rflag, oflag, step, rkstep, nshells
+       real ::  k1(nx/2), k2(ny), k3(nz), mk1(nx/2), mk2(ny), mk3(nz) &
+              , kx(nx/2), ky_(nx/2,ny), ky(nx/2,ny), kz(nz)
+       complex :: sx(nx/2,3), sy(ny,3), sz(nz,3)
+       integer(int64) :: trigx, trigy, trigz, trigxy
+
+       complex, allocatable ::  u(:,:,:,:)[:]    ! u(nz,4,first_x:last_x,ny)[*]    !(*-- x-y planes --*)
+       complex, allocatable ::  ur(:,:,:,:)[:]   !ur(nz,4,first_y:last_y,nx/2)[*]  !(*-- x-z planes --*)
+       complex, allocatable ::  un(:,:,:,:)      !un(nz,3,first_x:last_x,ny)[*]    !(*-- x-y planes --*)
+       complex, allocatable :: bufr_X_Y(:,:,:,:)
+       complex, allocatable :: bufr_Y_X(:,:,:,:)
+       real :: t_start,t_end
+
+interface
+!--------    note: integer(int64)'s required for FFT's and other assembly-coded externals   ------
+
+   function ctrig( len ) bind(C)               !(*-- define complex FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in) :: len
+        integer(int64) :: ctrig     !-- C pointer!
+   end function ctrig
+
+   function rtrig( len ) bind(C)              !(*-- define real FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in):: len
+        integer(int64) :: rtrig     !-- C pointer!
+   end function rtrig
+
+   subroutine cfft( len, lot, data, inc, jmp, ctrig, isign ) bind(C)    !(*-- complex FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, ctrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine cfft
+
+   subroutine rfft( len, lot, data, inc, jmp, rtrig, isign ) bind(C)   !(*-- real FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, rtrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine rfft
+
+#ifdef HAVE_WALLTIME
+   function WALLTIME() bind(C, name = "WALLTIME")
+       import real64
+       real(real64) :: WALLTIME
+   end function WALLTIME
+#endif
+
+
+end interface
+
+       trigx = rtrig( nx )
+       trigy = ctrig( ny )
+       trigz = ctrig( nz )
+       trigxy = ctrig( nx+ny )
+
+       allocate (  u(nz , 4 , first_x:last_x , ny)[*] )     !(*-- y-z planes --*)
+       allocate ( ur(nz , 4 , first_y:last_y , nx/2)[*] )   !(*-- x-z planes --*)
+       allocate ( un(nz , 3 , first_x:last_x , ny) )        !(*-- y-z planes --*)
+       allocate ( bufr_X_Y(nz,4,mx,my) )
+       allocate ( bufr_Y_X(nz,4,my,mx) )
+
+
+       stop = 0;   step = 0;  rkstep = 2;  rflag = 0;  cfl = 1;   dt = 0
+       nshells = max( nx,ny,nz )
+
+                        call define_kspace
+                        call define_field
+                        call enforce_conjugate_symmetry
+                        call copy_n_s
+                        call define_shifts
+
+       total_time =  -WALLTIME()      !-- start the clock
+       tran_time = 0;       cpu_time_ = -WALLTIME()
+
+  !(*********************************   begin execution loop   *****************************************)
+
+       do while (stop == 0)
+
+                        call phase1
+            rkstep = 1
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call define_step
+                        call define_shifts
+                        call phase3
+                        call pressure
+        if (oflag /= 0) call spectra
+                        call advance
+                        call phase1
+            rkstep = 2
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call phase3
+                        call advance
+                        call pressure
+        if (rflag /= 0) call remesh
+                        call copy_s_n
+
+                        step = step + 1
+                        time = time + dt
+       end do
+
+  !(*********************************   end execution loop   ***********************************************)
+
+       deallocate ( u, ur, un )
+       deallocate ( bufr_X_Y );    deallocate ( bufr_Y_X )
+       sync all     !-- wait for all images to finish!
+
+       total_time =  total_time + WALLTIME()    !-- stop the clock
+       cpu_time_ =  cpu_time_ + WALLTIME()    !-- stop the clock
+       call global_times
+
+       if (my_node == 1 )   write(6,fmt="(3(10X,A,2f7.2))")  &
+                              "total_time ", min_total_time/step, max_total_time/step &
+                            , "cpu_time_ ", min_cpu_time/step, max_cpu_time/step &
+                            , "tran_time ", min_tran_time/step, max_tran_time/step
+
+
+       write(6,fmt="(A,i4,3f7.2)")  "image ", my_node, total_time/step, cpu_time_/step, tran_time/step
+
+
+contains
+
+ !(***********************************************************************************************************
+ !                          transpose the Y and Z planes
+ !***********************************************************************************************************)
+
+!-----                   u(nz,4,mx,my*num_nodes) [num_nodes]
+!-----                  ur(nz,4,my,mx*num_nodes) [num_nodes]
+!-----                bufr(nz,4,my,mx) or bufr(nz,4,mx,my)
+
+!-------------   out-of-place transpose data_s --> data_r  ----------------------------
+
+ subroutine transpose_X_Y
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    integer(int64) :: i,stage
+    real :: t_start,t_end
+
+    cpu_time_ = cpu_time_ + WALLTIME()
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (    u(1,1,first_x,1+(my_node-1)*my) &                   !-- intra-node transpose
+                ,  ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- no inter-node transpose needed
+                ,   nz*3, one, one        &                                 !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        bufr_X_Y(:,:,:,:) = u(:,:,:,1+(my_node-1)*my:my_node*my)[i]         !-- inter-node transpose to buffer
+        call copy3 ( bufr_X_Y, ur(1,1,first_y,1+(i-1)*mx)  &                !-- intra-node transpose from buffer
+                        ,   nz*3, one, one        &                             !-- note: only 3 of 4 words needed
+                        ,   mx, nz*4, nz*4*my &
+                        ,   my, nz*4*mx, nz*4 )
+    end do
+
+    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time_ = cpu_time_ - WALLTIME()
+
+ end  subroutine transpose_X_Y
+
+!-------------   out-of-place transpose data_r --> data_s  ----------------------------
+
+subroutine transpose_Y_X
+    use run_size
+    implicit none
+
+    integer(int64) :: i, stage
+
+    cpu_time_ = cpu_time_ + WALLTIME()
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (   ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- intra-node transpose
+                ,   u(1,1,first_x,1+(my_node-1)*my) &                   !-- no inter-node transpose needed
+                ,   nz*4, one, one        &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        bufr_Y_X(:,:,:,:) = ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i]        !-- inter-node transpose to buffer
+        call copy3 ( bufr_Y_X, u(1,1,first_x,1+(i-1)*my)  &                 !-- intra-node transpose from buffer
+                    ,   nz*4, one, one        &
+                    ,   my, nz*4, nz*4*mx &
+                    ,   mx, nz*4*my, nz*4 )
+    end do
+
+    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time_ = cpu_time_ - WALLTIME()
+
+ end  subroutine transpose_Y_X
+
+
+!(*************************************************************************************************************
+!           enforce conjugate symmetry for plane kx=0 of wavespace  (half of this plane is redundant)
+!***************************************************************************************************************)
+
+  subroutine enforce_conjugate_symmetry
+
+    integer(int64) ::  i, x, y, z
+
+!(*------------------------- un( K ) = conjg( un( -K ) ) ---------------------*)
+
+    if (my_node == 1 ) then     !-- x=1 is in node=1
+        x = 1
+        do i = 1, 3
+            z = 1;              y = 1;              un(z,i,x,y) = 0
+            z = 1;              do y = 2, ny/2;     un(z,i,x,y) = conjg( un(z,i,x,ny+2-y) );        end do
+            do z = 2, nz/2;     y = 1;              un(z,i,x,y) = conjg( un(nz+2-z,i,x,y) );        end do
+            do z = 2, nz/2;     do y = 2, ny;       un(z,i,x,y) = conjg( un(nz+2-z,i,x,ny+2-y) );   end do; end do
+        end do
+    end if
+end  subroutine enforce_conjugate_symmetry
+
+ !(***********************************************************************************************************
+ !                spectra :  accumulate spectra and other statistics over flow field
+ !***********************************************************************************************************)
+
+ subroutine  spectra
+
+      use run_size
+     implicit none
+
+    integer(int64) :: k, x, y, z
+    real :: kk, ww, uw, uu, uv, duu, factor   &
+          , ek(nshells), dk(nshells), hk(nshells), tk(nshells), sample(nshells)
+    real, save, codimension[*] ::  sum_ek, sum_dk, sum_hk, sum_tk
+    real, save ::  sum_ek_initial, sum_dk_initial, sum_hk_initial, sum_tk_initial
+    logical, save :: first_call=.true.
+    real, parameter :: tolerance=0.01,negligible=0.001
+
+      total_time =  total_time + WALLTIME()     !-- stop the clock!  time/step does not include spectra time
+
+      oflag = 0
+      ek = 0;       dk = 0;       hk = 0;     tk = 0;   sample = 0
+
+ !(*---------------------   three dimensional spectra  -----------------------*)
+
+      do x = first_x, last_x;   do y = 1, ny;      do  z = 1, nz
+
+            if( mk1(x)+mk2(y)+mk3(z) > 2./9. ) &
+                              then;   factor = 0
+            else if (x == 1)  then;   factor = 1
+                              else;   factor = 2
+            end if
+
+            kk = kx(x)**2 + ky(x,y)**2 + kz(z)**2
+            k = 1 + int( sqrt( kk ) + 0.5 )
+
+              uu = factor * real( un(z,1,x,y) * conjg( un(z,1,x,y) ) &
+                                + un(z,2,x,y) * conjg( un(z,2,x,y) ) &
+                                + un(z,3,x,y) * conjg( un(z,3,x,y) ) )
+              ww = kk * uu
+              uv = factor * real( un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+              uw = factor * 2 * aimag( kx(x) *   un(z,2,x,y) * conjg( un(z,3,x,y) ) &
+                                     + ky(x,y) * un(z,3,x,y) * conjg( un(z,1,x,y) ) &
+                                     + kz(z) *   un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+              duu = factor * real( un(z,1,x,y) * conjg( u(z,1,x,y) ) &
+                                 + un(z,2,x,y) * conjg( u(z,2,x,y) ) &
+                                 + un(z,3,x,y) * conjg( u(z,3,x,y) ) ) / (dt/2) + shear * uv
+
+              sample(k) = sample(k) + factor        !(*-- shell sample --*)
+              ek(k) = ek(k) + uu                    !(*-- 2 * energy sum --*)
+              dk(k) = dk(k) + ww                    !(*-- enstrophy sum --*)
+              hk(k) = hk(k) + uw                    !(*-- helicity sum --*)
+              tk(k) = tk(k) + duu                   !(*-- transfer sum --*)
+
+      end do;   end do;  end do
+
+ !(************************     finished accumulation :  compute final statistics     *************************)
+
+    sum_ek = 0;     sum_dk = 0;     sum_hk = 0;     sum_tk = 0
+    do k = nshells, 1, -1
+        sum_ek  = sum_ek + ek(k)
+        sum_dk  = sum_dk + dk(k)
+        sum_hk  = sum_hk + hk(k)
+        sum_tk  = sum_tk + tk(k)
+    end do
+
+    sync all
+    call co_sum(sum_ek,1)
+    call co_sum(sum_dk,1)
+    call co_sum(sum_hk,1)
+    call co_sum(sum_tk,1)
+    if (my_node == 1)  then
+        ! do k = 2, num_nodes
+        !     sum_ek = sum_ek + sum_ek[k]
+        !     sum_dk = sum_dk + sum_dk[k]
+        !     sum_hk = sum_hk + sum_hk[k]
+        !     sum_tk = sum_tk + sum_tk[k]
+        ! end do
+
+        if (step == 0)   write(6,*) "step   time     energy    enstrophy   helicity   transfer"
+        write(6,fmt="(i3, 5e11.3)")  step,  time,    sum_ek,   sum_dk,     sum_hk,    sum_tk
+        if (first_call) then
+          first_call = .false.
+          sum_ek_initial = sum_ek
+          sum_dk_initial = sum_dk
+          sum_hk_initial = sum_hk
+          sum_tk_initial = sum_tk
+        else
+          if (abs((sum_ek_initial-sum_ek)/sum_ek)>tolerance) error stop "Test failed"
+          if (abs((sum_dk_initial-sum_dk)/sum_dk)>tolerance) error stop "Test failed"
+          if (abs((sum_hk_initial-sum_hk)/sum_hk)>tolerance) error stop "Test failed"
+          if (abs((sum_tk_initial-sum_tk)/sum_ek)>negligible) error stop "Test failed"
+        end if
+    end if
+
+     total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end  subroutine  spectra
+
+  !(************************************************************************************************************
+  !        define_field  :   define initial flow field from scratch
+  !************************************************************************************************************)
+
+ subroutine define_field
+
+    use constants_module, only : zero
+    use run_size
+    use random_module
+    implicit none
+
+    real ::     k, k12, f, phi, theta1, theta2
+    complex ::  alpha, beta
+    integer(int64) ::  x, y, z
+    real, parameter :: klo=8, khi=16
+
+   call init_random_seed !(* seed a different pseudo-random number sequence for each image *)
+   time = 0
+
+   do x = first_x, last_x
+       do  y = 1, ny
+            do z = 1, nz
+                 call random_number(theta1 )
+                 call random_number(theta2 )
+                 call random_number(phi    )
+                 k   = sqrt( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                 k12 = sqrt( kx(x)**2 + ky(x,y)**2 )
+
+                 if ( k == 0  .or.  mk1(x)+mk2(y)+mk3(z)>2./9.  .or.  k < klo  .or.  k > khi ) &
+                     then;   f = 0
+                     else;   f = sqrt( 1./(2*pi) ) / k
+                 end if
+
+                 alpha = f * exp( (0,2) * pi * theta1 ) * cos( 2*pi * phi )
+                 beta  = f * exp( (0,2) * pi * theta2 ) * sin( 2*pi * phi )
+
+                 if (k12 == 0) &
+                 then; un(z,1,x,y) = alpha
+                       un(z,2,x,y) = beta
+                       un(z,3,x,y) = 0
+
+                 else; un(z,1,x,y) = ( beta * kz(z) * kx(x)   + alpha * k * ky(x,y) ) / ( k * k12 )
+                       un(z,2,x,y) = ( beta * kz(z) * ky(x,y) - alpha * k * kx(x)   ) / ( k * k12 )
+                       un(z,3,x,y) = - beta * k12 / k
+                 end if
+
+   end do;  end do; end do
+ end  subroutine define_field
+
+ !(***********************************************************************************************************
+ !          define_shifts  :    define coordinate shifts for control of 1-d alias errors
+ ! ***********************************************************************************************************)
+
+    subroutine  define_shifts
+    use constants_module, only : zero
+    use run_size
+    implicit none
+
+           integer(int64) ::  x, y, z
+           integer(int64), save ::  init = 0
+           real :: delta_x, delta_y, delta_z
+           integer :: i,seed_size
+
+           if (init == 0) &     !-- Note: delta's not carried over from previous run
+           then;
+                init = 1
+                call random_seed(size=seed_size)
+                call random_seed(put=[(1234567,i=1,seed_size)])!(* same random numbers for each image! *)
+                do  x = 1, nx/2;  sx(x,3) = exp (  (0,1) * ( pi / nx ) * k1(x) ); end do
+                do  y = 1, ny  ;  sy(y,3) = exp (  (0,1) * ( pi / ny ) * k2(y) ); end do
+                do  z = 1, nz  ;  sz(z,3) = exp (  (0,1) * ( pi / nz ) * k3(z) ); end do
+            else;
+                call random_number(delta_x); delta_x = 2*pi / nx * delta_x
+                do  x = 1, nx/2;  sx(x,1) = sx(x,3)
+                                  sx(x,2) = exp (  (0,1) * delta_x * k1(x) )
+                                  sx(x,3) = exp (  (0,1) * ( delta_x + pi / nx ) * k1(x) ); end do
+
+                call random_number(delta_y); delta_y = 2*pi / ny * delta_y
+                do  y = 1, ny  ;  sy(y,1) = sy(y,3)
+                                  sy(y,2) = exp (  (0,1) * delta_y * k2(y) )
+                                  sy(y,3) = exp (  (0,1) * ( delta_y + pi / ny ) * k2(y) ); end do
+
+                call random_number(delta_z); delta_z = 2*pi / nz * delta_z
+                do  z = 1, nz  ;  sz(z,1) = sz(z,3)
+                                  sz(z,2) = exp (  (0,1) * delta_z * k3(z) )
+                                  sz(z,3) = exp (  (0,1) * ( delta_z + pi / nz ) * k3(z) ); end do
+           end if
+
+ end  subroutine  define_shifts
+
+  !(***********************************************************************************************************
+  !       define_step  :   update time, metric, shifts for the next step
+  !**********************************************************************************************************)
+
+  subroutine  define_step
+      use run_size
+      implicit none
+
+    sync all
+
+    if (cfl /= 0) then
+cpu_time_ = cpu_time_ + WALLTIME()
+        call max_velmax
+cpu_time_ = cpu_time_ - WALLTIME()
+        dt = cfl / velmax
+    end if
+
+    if (        shear > 0               &
+        .and.  .01*b11*shear*dt < b12   &
+        .and.  b12 <= b11*shear*dt )    then
+            dt = b12 / ( b11 * shear )          !(* limit dt, hit the orthognal mesh *)
+            oflag = 1
+    else if ( mod (step,output_step) == 0 ) then
+            oflag = 1
+    end if
+
+    b12 = b12 - b11 * shear * dt
+
+    if ( b12 < -b22/2 )     rflag = 1                                   !(* remesh at the end of the step? *)
+    if ( step == nsteps )   stop = 1                                    !(* last step? *)
+
+ end   subroutine    define_step
+
+   !(***********************************************************************************************************
+   !      define_kspace  :   define physical wavespace from computational wavespace and metric
+   !**********************************************************************************************************)
+
+  subroutine    define_kspace
+    use run_size
+    implicit none
+
+     integer(int64) ::  x, y, z
+
+       do  x = 1, nx/2   ;   k1(x) = x - 1;     end do
+       do  y = 1, ny/2+1 ;   k2(y) = y - 1;     end do
+       do  z = 1, nz/2+1 ;   k3(z) = z - 1;     end do
+
+       do  y = ny/2+2, ny;   k2(y) = y - 1 - ny;    end do
+       do  z = nz/2+2, nz;   k3(z) = z - 1 - nz;    end do
+
+       do  x = 1, nx/2 ;     mk1(x) = (k1(x)/nx)**2;    kx(x) =   b11 * k1(x);  end do
+       do  z = 1, nz   ;     mk3(z) = (k3(z)/nz)**2;    kz(z) =   b33 * k3(z);  end do
+       do  y = 1, ny   ;     mk2(y) = (k2(y)/ny)**2
+                             do  x = 1, nx/2 ;    ky(x,y) = b22 * k2(y) + b12 * k1(x);  end do; end do
+
+end   subroutine    define_kspace
+
+  !(***********************************************************************************************************
+  !   phase 1 :  on entry, data-plane contains velocity in wave space.  interpolate database, shifted mesh,
+  !              and proceed to physical y space .
+  !************************************************************************************************************)
+
+  subroutine  phase1
+
+      use run_size
+      implicit none
+
+    complex :: shift
+    integer(int64) :: i, x, y, z
+
+        do  x = first_x, last_x
+
+          do  y = 1, ny;    do  z = 1, nz
+             shift = sz(z,rkstep+1) * sy(y,rkstep+1) * sx(x,rkstep+1)
+             u(z,1,x,y) = shift * u(z,1,x,y)
+             u(z,2,x,y) = shift * u(z,2,x,y)
+             u(z,3,x,y) = shift * u(z,3,x,y)
+        end do; end do
+
+!(*---------------------------   LEAVING FOURIER WAVE SPACE  --------------------------*)
+
+        do i = 1, 3
+            call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, one );    end do
+        end do
+
+ end   subroutine  phase1
+
+ !(**********************************************************************************************************
+ !     phase 2 :  on entry, data-plane contains velocity in physical y space, and wave x,z space on shifted
+ !                mesh.  Proceed to physical x,z space,  form nonlinear terms, and return to wave x,z space.
+ !***********************************************************************************************************)
+
+ subroutine  phase2
+
+     use run_size
+     implicit none
+
+     complex :: s2(nz,nx/2), vs(nz,nx/2)
+     integer(int64) ::  i, x, y, z
+     real :: v2r, v2i, s2r, s2i, u1r, u1i, u2r, u2i, u3r, u3i, u4r, u4i
+
+  velmax = 0
+
+  do  y = first_y, last_y
+
+      do  x = 1, nx/2 ;  do  z = 1, nz ;   vs(z,x) = ur(z,2,y,x);    end do; end do
+
+      do  i = 1, 3
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, one )
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, one )
+      end do
+
+!(*----------------------------  WELCOME TO PHYSICAL SPACE  --------------------------*)
+
+      do  x = 1, nx/2;  do  z = 1, nz
+           u1r = real(ur(z,1,y,x));  u1i = aimag(ur(z,1,y,x))
+           u2r = real(ur(z,2,y,x));  u2i = aimag(ur(z,2,y,x))
+           u3r = real(ur(z,3,y,x));  u3i = aimag(ur(z,3,y,x))
+
+      if ( rkstep == 1 ) velmax = max( velmax &
+                                     , b11*nx*abs(u1r) + b22*ny*abs(u2r) + b33*nz*abs(u3r) &
+                                     , b11*nx*abs(u1i) + b22*ny*abs(u2i) + b33*nz*abs(u3i) )
+
+           v2r = u2r * u2r;             v2i = u2i * u2i
+           s2r = u1r * u3r;             s2i = u1i * u3i
+           u4r = u2r * u3r;             u4i = u2i * u3i
+           u3r = u3r * u3r  -  v2r;     u3i = u3i * u3i  -  v2i
+           u2r = u1r * u2r;             u2i = u1i * u2i
+           u1r = u1r * u1r  -  v2r;     u1i = u1i * u1i  -  v2i
+
+           s2(z,x)     = cmplx(s2r, s2i)
+           ur(z,1,y,x) = cmplx(u1r, u1i)
+           ur(z,2,y,x) = cmplx(u2r, u2i)
+           ur(z,3,y,x) = cmplx(u3r, u3i)
+           ur(z,4,y,x) = cmplx(u4r, u4i)
+      end do;   end do
+
+!(*----------------------------  LEAVING PHYSICAL SPACE  --------------------------*)
+
+      do  i = 1,  4
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, -one )
+           do  z = 1, nz ;   ur(z,i,y,1) = cmplx(real(ur(z,i,y,1)),0);   end do
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, -one )
+      end do
+
+      call rfft ( nx, nz, s2, nz, one, trigx, -one )
+      do  z = 1, nz ;   s2(z,1) = cmplx(real(s2(z,1)),0);    end do
+      call cfft ( nz, nx/2, s2, one, nz, trigz, -one )
+
+      do  x = 1, nx/2;  do  z = 1, nz
+          ur(z,1,y,x) = kx(x) * ur(z,1,y,x) + kz(z) * s2(z,x)   - (0,1) * 2*nx*nz*shear * vs(z,x)
+          ur(z,3,y,x) = kx(x) * s2(z,x)     + kz(z) * ur(z,3,y,x)
+      end do;   end do
+  end do
+
+ end  subroutine  phase2
+
+  !(***********************************************************************************************************
+  !     phase 3 :  on entry, the data-plane contains the four stresses on a shifted mesh in physical y space,
+  !                wave x,z space.   Return to y  wave space on unshifted mesh and complete time derivative of
+  !                velocity ( not divergence free yet )
+  !***********************************************************************************************************)
+
+  subroutine  phase3
+
+      use run_size
+      implicit none
+
+    integer(int64) :: i, x, y, z
+    complex :: shift
+
+    do  x = first_x, last_x
+
+       do i = 1, 4
+           call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, -one )
+       end do
+
+!(*---------------------------   WELCOME TO FOURIER WAVE SPACE  --------------------------*)
+
+       do  y = 1, ny ;   do  z = 1, nz
+               shift = -dt / (4*nx*ny*nz) * (0,1)*conjg( sy(y,rkstep) * sz(z,rkstep) * sx(x,rkstep) )
+               u(z,1,x,y) = shift * (         u(z,1,x,y) + ky(x,y) * u(z,2,x,y) )
+               u(z,2,x,y) = shift * ( kx(x) * u(z,2,x,y) + kz(z)   * u(z,4,x,y) )
+               u(z,3,x,y) = shift * (         u(z,3,x,y) + ky(x,y) * u(z,4,x,y) )
+       end do;  end do
+    end do
+
+ end   subroutine  phase3
+
+  !(***********************************************************************************************************
+  !   pressure :  add the gradient of a scalar, enforce continuity ( zero divergence )
+  !***********************************************************************************************************)
+
+  subroutine  pressure
+
+      use run_size
+      implicit none
+
+     complex :: psi
+     integer(int64) :: x, y, z
+
+      do x = first_x, last_x ;     do  y = 1, ny
+
+            if ( x /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( kx(x) * u(z,1,x,y) + ky(x,y) * u(z,2,x,y) + kz(z) * u(z,3,x,y) ) &
+                              / ( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                        u(z,1,x,y) = u(z,1,x,y) - kx(x) * psi
+                        u(z,2,x,y) = u(z,2,x,y) - ky(x,y) * psi
+                        u(z,3,x,y) = u(z,3,x,y) - kz(z) * psi
+                   end do
+            else if ( y /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( ky(1,y) * u(z,2,1,y) + kz(z) * u(z,3,1,y) ) &
+                              / ( ky(1,y)**2 + kz(z)**2 )
+                        u(z,2,1,y) = u(z,2,1,y) - ky(1,y) * psi
+                        u(z,3,1,y) = u(z,3,1,y) - kz(z) * psi
+                  end do
+            else
+                  do  z = 1, nz ;    u(z,3,1,1) = 0;     end do
+            end if
+      end do;    end do
+
+end   subroutine  pressure
+
+!(*****************************************************************************************************************
+!                                remesh  :  remesh the sheared coordinate system
+!*****************************************************************************************************************)
+
+subroutine   remesh
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    complex :: u2(nx+ny,nz), shift(nx+ny)
+    integer(int64) :: i, x, y, z
+
+    write(6,fmt="(A,i4)") "remesh image ", my_node
+
+    total_time =  total_time + WALLTIME()     !-- stop the clock!
+
+    do x = first_x, last_x
+
+        do  y = 1, nx+ny ;   shift(y) =  exp( (0,-2) * pi / (nx+ny) * k1(x) * (y - 1) ) / (nx+ny);    end do
+
+        do  i = 1,  3
+            do  z = 1, nz
+                do  y = 1, ny/2           ;   u2(y,z) = u(z,i,x,y);     end do
+                do  y = ny/2+1, nx+ny/2+1 ;   u2(y,z) = 0;              end do
+                do  y = nx+ny/2+2, nx+ny  ;   u2(y,z) = u(z,i,x,y-nx);  end do
+            end do
+
+            call cfft ( nx+ny, nz, u2, one , nx+ny, trigxy, one )
+
+            do  z = 1, nz ;  do  y = 1, nx+ny ;     u2(y,z) = u2(y,z) * shift(y);  end do;  end do
+
+            call cfft ( nx+ny, nz, u2, one, nx+ny, trigxy, -one )
+
+            do  z = 1, nz
+                do  y = 1, ny/2
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y,z)
+                    end if
+                end do
+                do  y = ny/2+1, ny
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y+nx,z)
+                    end if
+                end do
+            end do
+        end do
+
+        do  y = 1, ny ;    ky(x,y) = ky(x,y) + b22 * k1(x);     end do           !(* update ky for this x *)
+
+    end do
+
+    b12 = b12 + b22;     rflag = 0                                               !(*   update metric, account for remesh    *)
+
+    total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end subroutine   remesh
+
+ !(***********************************************************************************************************
+ !         copy_n_s,   copy_s_n :  copy data between data_s and data_n
+ !***********************************************************************************************************)
+
+ subroutine  copy_n_s
+
+     use run_size
+     implicit none
+
+     integer(int64) ::  x, y, z
+
+     do y = 1, ny;    do x = first_x, last_x;   do z = 1, nz
+        u(z,1,x,y) = un(z,1,x,y)
+        u(z,2,x,y) = un(z,2,x,y)
+        u(z,3,x,y) = un(z,3,x,y)
+     end do; end do; end do
+ end  subroutine  copy_n_s
+
+ subroutine  copy_s_n
+
+     use run_size
+     implicit none
+
+     integer(int64) ::  x, y, z
+
+     do y = 1, ny; do x = first_x, last_x;     do z = 1, nz
+       un(z,1,x,y) = u(z,1,x,y)
+       un(z,2,x,y) = u(z,2,x,y)
+       un(z,3,x,y) = u(z,3,x,y)
+    end do; end do; end do
+
+ end  subroutine  copy_s_n
+
+ !(***********************************************************************************************************
+ !                         advance :     second-order runge-kutta time step algorithm
+ !***********************************************************************************************************)
+
+ subroutine  advance
+
+     use run_size
+     implicit none
+
+    integer(int64) ::  x, y, z
+    real :: factor, xyfac, zfac(nz)  !(* viscous integrating factors *)
+
+    if (rkstep == 1) then
+        do z = 1, nz;     zfac(z) = exp( - viscos * dt * kz(z)**2 ); end do
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                ky_(x,y) = ky(x,y)
+                ky(x,y) = b22 * k2(y) + b12 * k1(x)
+
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        factor = zfac(z) * exp( - viscos * dt * ( kx(x)**2 + ( ky_(x,y)**2 + ky_(x,y)*ky(x,y) + ky(x,y)**2 )/3 ) )
+
+                        un(z,1,x,y) = factor * ( un(z,1,x,y) + u(z,1,x,y) )
+                        u(z,1,x,y)  = un(z,1,x,y) + factor * u(z,1,x,y)
+
+                        un(z,2,x,y) = factor * ( un(z,2,x,y) + u(z,2,x,y) )
+                        u(z,2,x,y)  = un(z,2,x,y) + factor * u(z,2,x,y)
+
+                        un(z,3,x,y) = factor * ( un(z,3,x,y) + u(z,3,x,y) )
+                        u(z,3,x,y)  = un(z,3,x,y) + factor * u(z,3,x,y)
+                    end if
+        end do;  end do; end do
+
+    else if (rkstep == 2) then
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        u(z,1,x,y) = un(z,1,x,y) + u(z,1,x,y)
+                        u(z,2,x,y) = un(z,2,x,y) + u(z,2,x,y)
+                        u(z,3,x,y) = un(z,3,x,y) + u(z,3,x,y)
+                    end if
+        end do; end do; end do
+
+    end if
+
+ end  subroutine  advance
+
+ end   subroutine solve_navier_stokes
diff --git a/src/tests/integration/pde_solvers/navier-stokes/coarray-shear_coll_lock.f90 b/src/tests/integration/pde_solvers/navier-stokes/coarray-shear_coll_lock.f90
new file mode 100644
index 0000000..7daefcf
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/coarray-shear_coll_lock.f90
@@ -0,0 +1,1036 @@
+! Coarray 3D Navier-Stokes Solver Test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+
+!(*----------------------------------------------------------------------------------------------------------------------
+!         basic in-core shear code ( 7 words/node, not threaded, in-core, no file read/write )
+!------------------------------------------------------------------------------------------------------------------------*)
+
+! Define universal constants:
+! In the case of exactly representable numbers, the definitions are useful
+! to ensure subprogram argument type/kind/rank matching without having to
+! repeat kind specifiers everywhere.
+module constants_module
+  use iso_fortran_env, only : int64
+  implicit none
+  private
+  public :: one,zero
+  integer(int64), parameter :: one=1_int64,zero=0_int64
+end module
+
+! Initialize the random seed with a varying seed to ensure a different
+! random number sequence for each invocation of subroutine, e.g. for
+! invocations on different images of a coarray parallel program.
+! Setting any seed values to zero is deprecated because it can result
+! in low-quality random number sequences.
+! (Source: https://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html)
+module random_module
+  implicit none
+  private
+  public :: init_random_seed
+contains
+  subroutine init_random_seed()
+            use iso_fortran_env, only: int64
+            implicit none
+            integer, allocatable :: seed(:)
+            integer :: i, n, un, istat, dt(8), pid
+            integer(int64) :: t
+
+            call random_seed(size = n)
+            allocate(seed(n))
+            ! First try if the OS provides a random number generator
+            open(newunit=un, file="/dev/urandom", access="stream", &
+                 form="unformatted", action="read", status="old", iostat=istat)
+            if (istat == 0) then
+               if (this_image()==1) print *,"OS provides random number generator"
+               read(un) seed
+               close(un)
+            else
+               if (this_image()==1) print *,"OS does not provide random number generator"
+               ! Fallback to XOR:ing the current time and pid. The PID is
+               ! useful in case one launches multiple instances of the same
+               ! program in parallel.
+               call system_clock(t)
+               if (t == 0) then
+                  call date_and_time(values=dt)
+                  t = (dt(1) - 1970) * 365_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(2) * 31_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(3) * 24_int64 * 60 * 60 * 1000 &
+                       + dt(5) * 60 * 60 * 1000 &
+                       + dt(6) * 60 * 1000 + dt(7) * 1000 &
+                       + dt(8)
+               end if
+               pid = getpid()
+               t = ieor(t, int(pid, kind(t)))
+               do i = 1, n
+                  seed(i) = lcg(t)
+               end do
+            end if
+            call random_seed(put=seed)
+          contains
+            ! This simple PRNG might not be good enough for real work, but is
+            ! sufficient for seeding a better PRNG.
+            function lcg(s)
+              integer :: lcg
+              integer(int64) :: s
+              if (s == 0) then
+                 s = 104729
+              else
+                 s = mod(s, 4294967296_int64)
+              end if
+              s = mod(s * 279470273_int64, 4294967291_int64)
+              lcg = int(mod(s, int(huge(0), int64)), kind(0))
+            end function lcg
+          end subroutine init_random_seed
+end module random_module
+
+module run_size
+    use iso_fortran_env ! 64-bit integer and real kind parameters
+    use constants_module, only : one         ! 64-bit unit to ensure argument kind match
+    implicit none
+        real, codimension[*] ::  viscos, shear, b11, b22, b33, b12, velmax
+        integer(int64), codimension[*] ::  nx, ny, nz, nsteps, output_step
+        integer(int64), codimension[*] :: my, mx, first_y, last_y, first_x, last_x
+        real(real64), codimension[*] :: cpu_time, tran_time, sync_time, total_time
+        real(real64), codimension[*] :: max_cpu_time, max_tran_time, max_sync_time, max_total_time
+        real(real64), codimension[*] :: min_cpu_time, min_tran_time, min_sync_time, min_total_time
+        type(lock_type),save :: l_xy_buff[*]
+        type(lock_type),save :: l_yx_buff[*]
+        real ::  time, cfl, dt
+        integer(int64) :: my_node, num_nodes
+        real, parameter :: pi = 3.141592653589793
+
+contains
+
+    subroutine max_velmax()
+        integer(int64) :: i
+
+        ! sync all
+        ! if( my_node == 1) then
+        !     do i = 2, num_nodes;     velmax = max( velmax, velmax[i] );    end do
+        ! end if
+        ! sync all
+        call co_max(velmax,1)
+        if (my_node>1) velmax = velmax[1]
+        sync all
+    end subroutine max_velmax
+
+    subroutine global_times()
+        integer(int64) :: i, stage
+
+        max_cpu_time = cpu_time
+        max_tran_time = tran_time
+        max_total_time = sync_time
+        max_total_time = total_time
+        min_cpu_time = cpu_time
+        min_tran_time = tran_time
+        min_total_time = sync_time
+        min_total_time = total_time
+
+        do stage = 1, num_nodes-1
+            i = 1 + mod( my_node-1+stage, num_nodes )
+            max_cpu_time = max( max_cpu_time, cpu_time[i] )
+            min_cpu_time = min( min_cpu_time, cpu_time[i] )
+            max_tran_time = max( max_tran_time, tran_time[i] )
+            min_tran_time = min( min_tran_time, tran_time[i] )
+            max_sync_time = max( max_sync_time, sync_time[i] )
+            min_sync_time = min( min_sync_time, sync_time[i] )
+            max_total_time = max( max_total_time, total_time[i] )
+            min_total_time = min( min_total_time, total_time[i] )
+        end do
+        sync all
+    end subroutine global_times
+
+subroutine copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
+  implicit none
+  complex, intent(in)  :: A(0:*)
+  complex, intent(out) :: B(0:*)
+  integer(int64), intent(in) :: n1, sA1, sB1
+  integer(int64), intent(in) :: n2, sA2, sB2
+  integer(int64), intent(in) :: n3, sA3, sB3
+  integer(int64) i,j,k
+
+  do k=0,n3-1
+     do j=0,n2-1
+        do i=0,n1-1
+           B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
+        end do
+     end do
+  end do
+end subroutine copy3
+
+end module run_size
+
+program cshear
+
+  !(***********************************************************************************************************
+  !                   m a i n   p r o g r a m
+  !***********************************************************************************************************)
+      use iso_fortran_env, only : int64,real64 ! 64-bit integer and real kind parameters
+      use run_size
+      implicit none
+
+      interface
+        subroutine solve_navier_stokes
+        end subroutine solve_navier_stokes
+      end interface
+
+      num_nodes = num_images()
+      my_node = this_image()
+
+      if( my_node == 1 ) then
+
+        write(6,*) "nx,ny,nz : ";               read(5,*) nx, ny, nz
+        if ( mod(nx/2,num_nodes) /= 0) then;    write(6,*) "nx/2 not multiple of num_nodes";    stop;   end if
+        if ( mod(ny,num_nodes) /= 0) then;      write(6,*) " ny not multiple of num_nodes";     stop;   end if
+
+        write(6,*) "viscos, shear : ";          read(5,*) viscos, shear
+        write(6,*) "b11 b22 b33 b12 : ";        read(5,*) b11, b22, b33, b12
+        write(6,*) "nsteps, output_step : ";    read(5,*) nsteps, output_step
+
+        write(6,fmt="(3(A,i4))") "nx =",nx,   "   ny =",ny,   "   nz =",nz
+        write(6,fmt="(2(A,f7.3))") "viscos = ", viscos, "      shear = ", shear
+        write(6,fmt="(A,4f7.3)") "b11 b22 b33 b12 = ", b11, b22, b33, b12
+        write(6,fmt="(2(A,i6))") "nsteps = ", nsteps, "       output_step = ", output_step
+
+        write(6,fmt="(A,i4,A)") "----------------- running on ", num_nodes, " images -------------------"
+
+      end if
+
+      sync all  !--- images > 1 wait on inputs from image = 1 !
+
+      if( my_node > 1 ) then
+        nx = nx[1];     ny = ny[1];     nz = nz[1]
+        viscos = viscos[1];     shear = shear[1]
+        b11 = b11[1];   b22 = b22[1];   b33 = b33[1];   b12 = b12[1]
+        nsteps = nsteps[1];     output_step = output_step[1]
+      end if
+
+ 	    mx = nx/2 / num_nodes;  first_x = (my_node-1)*mx + 1;   last_x  = (my_node-1)*mx + mx
+        my = ny / num_nodes;    first_y = (my_node-1)*my + 1;   last_y  = (my_node-1)*my + my
+
+      if(my_node == 1 ) write(6,fmt="(A, f6.2)") "message size (MB) = ", real(nz*4*mx*my*8)/real(1024*1024)
+
+    call solve_navier_stokes
+
+end program cshear
+
+!  (***********************************************************************************************************
+!             n a v i e r - s t o k e s   s o l v e r
+!  ************************************************************************************************************)
+
+  subroutine solve_navier_stokes
+      use run_size
+      implicit none
+
+  !(*****************************   declarations     ****************************************)
+
+       integer(int64) ::  stop, rflag, oflag, step, rkstep, nshells
+       real ::  k1(nx/2), k2(ny), k3(nz), mk1(nx/2), mk2(ny), mk3(nz) &
+              , kx(nx/2), ky_(nx/2,ny), ky(nx/2,ny), kz(nz)
+       complex :: sx(nx/2,3), sy(ny,3), sz(nz,3)
+       integer(int64) :: trigx, trigy, trigz, trigxy
+
+       complex, allocatable ::  u(:,:,:,:)[:]    ! u(nz,4,first_x:last_x,ny)[*]    !(*-- x-y planes --*)
+       complex, allocatable ::  ur(:,:,:,:)[:]   !ur(nz,4,first_y:last_y,nx/2)[*]  !(*-- x-z planes --*)
+       complex, allocatable ::  un(:,:,:,:)      !un(nz,3,first_x:last_x,ny)[*]    !(*-- x-y planes --*)
+       complex, allocatable :: bufr_X_Y(:,:,:,:)
+       complex, allocatable :: bufr_Y_X(:,:,:,:)
+
+interface
+!--------    note: integer(int64)'s required for FFT's and other assembly-coded externals   ------
+
+   function ctrig( len ) bind(C)               !(*-- define complex FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in) :: len
+        integer(int64) :: ctrig     !-- C pointer!
+   end function ctrig
+
+   function rtrig( len ) bind(C)              !(*-- define real FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in):: len
+        integer(int64) :: rtrig     !-- C pointer!
+   end function rtrig
+
+   subroutine cfft( len, lot, data, inc, jmp, ctrig, isign ) bind(C)    !(*-- complex FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, ctrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine cfft
+
+   subroutine rfft( len, lot, data, inc, jmp, rtrig, isign ) bind(C)   !(*-- real FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, rtrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine rfft
+
+   function WALLTIME() bind(C, name = "WALLTIME")
+       import real64
+       real(real64) :: WALLTIME
+   end function WALLTIME
+
+
+end interface
+
+       trigx = rtrig( nx )
+       trigy = ctrig( ny )
+       trigz = ctrig( nz )
+	   trigxy = ctrig( nx+ny )
+
+       allocate (  u(nz , 4 , first_x:last_x , ny)[*] )     !(*-- y-z planes --*)
+       allocate ( ur(nz , 4 , first_y:last_y , nx/2)[*] )   !(*-- x-z planes --*)
+       allocate ( un(nz , 3 , first_x:last_x , ny) )        !(*-- y-z planes --*)
+       allocate ( bufr_X_Y(nz,4,mx,my) )
+       allocate ( bufr_Y_X(nz,4,my,mx) )
+
+
+       stop = 0;   step = 0;  rkstep = 2;  rflag = 0;  cfl = 1;   dt = 0
+       nshells = max( nx,ny,nz )
+
+                        call define_kspace
+                        call define_field
+                        call enforce_conjugate_symmetry
+						call copy_n_s
+                        call define_shifts
+
+       total_time =  -WALLTIME()      !-- start the clock
+
+       tran_time = 0;       cpu_time = -WALLTIME()
+
+  !(*********************************   begin execution loop   *****************************************)
+
+       do while (stop == 0)
+
+                        call phase1
+            rkstep = 1
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call define_step
+                        call define_shifts
+                        call phase3
+                        call pressure
+		if (oflag /= 0) call spectra
+                        call advance
+                        call phase1
+            rkstep = 2
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call phase3
+                        call advance
+                        call pressure
+        if (rflag /= 0) call remesh
+                        call copy_s_n
+
+                		step = step + 1
+                        time = time + dt
+       end do
+
+  !(*********************************   end execution loop   ***********************************************)
+
+       deallocate ( u, ur, un )
+       deallocate ( bufr_X_Y );    deallocate ( bufr_Y_X )
+       sync all     !-- wait for all images to finish!
+
+       total_time =  total_time + WALLTIME()    !-- stop the clock
+       cpu_time =  cpu_time + WALLTIME()    !-- stop the clock
+       call global_times
+
+       if (my_node == 1 )   write(6,fmt="(3(10X,A,2f7.2))")  &
+                            , "total_time ", min_total_time/step, max_total_time/step &
+                            , "cpu_time ", min_cpu_time/step, max_cpu_time/step &
+                            , "tran_time ", min_tran_time/step, max_tran_time/step
+
+
+       write(6,fmt="(A,i4,3f7.2)")  "image ", my_node, total_time/step, cpu_time/step, tran_time/step
+
+
+contains
+
+ !(***********************************************************************************************************
+ !                          transpose the Y and Z planes
+ !***********************************************************************************************************)
+
+!-----                   u(nz,4,mx,my*num_nodes) [num_nodes]
+!-----                  ur(nz,4,my,mx*num_nodes) [num_nodes]
+!-----                bufr(nz,4,my,mx) or bufr(nz,4,mx,my)
+
+!-------------   out-of-place transpose data_s --> data_r  ----------------------------
+
+ subroutine transpose_X_Y
+
+    use constants_module, only : one
+    use run_size
+    use iso_fortran_env
+    implicit none
+
+    integer(int64) :: i,stage
+!    type(lock_type),save :: l_xy_buff[*]
+
+    cpu_time = cpu_time + WALLTIME()
+    lock(l_xy_buff[this_image()])
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (    u(1,1,first_x,1+(my_node-1)*my) &                   !-- intra-node transpose
+                ,  ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- no inter-node transpose needed
+                ,   nz*3, one, one        &                                 !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+    unlock(l_xy_buff[this_image()])
+!    write(*,*) this_image(), 'unlocks itself xy'
+
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        lock(l_xy_buff[i])
+!	write(*,*) this_image(),'locks xy', i
+        bufr_X_Y(:,:,:,:) = u(:,:,:,1+(my_node-1)*my:my_node*my)[i]         !-- inter-node transpose to buffer
+        unlock(l_xy_buff[i])
+!	write(*,*) this_image(),'unlocks xy',i
+        call copy3 ( bufr_X_Y, ur(1,1,first_y,1+(i-1)*mx)  &                !-- intra-node transpose from buffer
+                        ,   nz*3, one, one        &                             !-- note: only 3 of 4 words needed
+                        ,   mx, nz*4, nz*4*my &
+                        ,   my, nz*4*mx, nz*4 )
+    end do
+
+!    sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time = cpu_time - WALLTIME()
+
+ end  subroutine transpose_X_Y
+
+!-------------   out-of-place transpose data_r --> data_s  ----------------------------
+
+subroutine transpose_Y_X
+    use run_size
+    use iso_fortran_env
+    implicit none
+
+    integer(int64) :: i, stage
+!    type(lock_type),save :: l_yx_buff[*]
+
+    cpu_time = cpu_time + WALLTIME()
+    lock(l_yx_buff[this_image()])
+!    sync images(*)
+    sync all   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+    call copy3 (   ur(1,1,first_y,1+(my_node-1)*mx) &                   !-- intra-node transpose
+                ,   u(1,1,first_x,1+(my_node-1)*my) &                   !-- no inter-node transpose needed
+                ,   nz*4, one, one        &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+    unlock(l_yx_buff[this_image()])
+    do stage = 1, num_nodes-1
+        i = 1 + mod( my_node-1+stage, num_nodes )
+        lock(l_yx_buff[i])
+	!write(*,*) this_image(),'locks yx', i
+        bufr_Y_X(:,:,:,:) = ur(:,:,:,1+(my_node-1)*mx:my_node*mx)[i]        !-- inter-node transpose to buffer
+        unlock(l_yx_buff[i])
+	!write(*,*) this_image(),'unlocks yx', i
+        call copy3 ( bufr_Y_X, u(1,1,first_x,1+(i-1)*my)  &                 !-- intra-node transpose from buffer
+                    ,   nz*4, one, one        &
+                    ,   my, nz*4, nz*4*mx &
+                    ,   mx, nz*4*my, nz*4 )
+    end do
+
+    !sync all     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time = cpu_time - WALLTIME()
+
+ end  subroutine transpose_Y_X
+
+
+!(*************************************************************************************************************
+!           enforce conjugate symmetry for plane kx=0 of wavespace  (half of this plane is redundant)
+!***************************************************************************************************************)
+
+  subroutine enforce_conjugate_symmetry
+
+    integer(int64) ::  i, x, y, z
+
+!(*------------------------- un( K ) = conjg( un( -K ) ) ---------------------*)
+
+    if (my_node == 1 ) then     !-- x=1 is in node=1
+        x = 1
+        do i = 1, 3
+            z = 1;              y = 1;              un(z,i,x,y) = 0
+            z = 1;              do y = 2, ny/2;     un(z,i,x,y) = conjg( un(z,i,x,ny+2-y) );        end do
+            do z = 2, nz/2;     y = 1;              un(z,i,x,y) = conjg( un(nz+2-z,i,x,y) );        end do
+            do z = 2, nz/2;     do y = 2, ny;       un(z,i,x,y) = conjg( un(nz+2-z,i,x,ny+2-y) );   end do;	end do
+        end do
+    end if
+end  subroutine enforce_conjugate_symmetry
+
+ !(***********************************************************************************************************
+ !                spectra :  accumulate spectra and other statistics over flow field
+ !***********************************************************************************************************)
+
+ subroutine  spectra
+
+      use run_size
+     implicit none
+
+    integer(int64) :: k, x, y, z
+    real :: kk, ww, uw, uu, uv, duu, factor   &
+          , ek(nshells), dk(nshells), hk(nshells), tk(nshells), sample(nshells)
+    real, save, codimension[*] ::  sum_ek, sum_dk, sum_hk, sum_tk
+
+      total_time =  total_time + WALLTIME()     !-- stop the clock!  time/step does not include spectra time
+
+	  oflag = 0
+      ek = 0;       dk = 0;       hk = 0;     tk = 0;   sample = 0
+
+ !(*---------------------   three dimensional spectra  -----------------------*)
+
+      do x = first_x, last_x;   do y = 1, ny;      do  z = 1, nz
+
+            if( mk1(x)+mk2(y)+mk3(z) > 2./9. ) &
+                              then;   factor = 0
+            else if (x == 1)  then;   factor = 1
+                              else;   factor = 2
+            end if
+
+            kk = kx(x)**2 + ky(x,y)**2 + kz(z)**2
+            k = 1 + int( sqrt( kk ) + 0.5 )
+
+			  uu = factor * real( un(z,1,x,y) * conjg( un(z,1,x,y) ) &
+                                + un(z,2,x,y) * conjg( un(z,2,x,y) ) &
+                                + un(z,3,x,y) * conjg( un(z,3,x,y) ) )
+              ww = kk * uu
+              uv = factor * real( un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+              uw = factor * 2 * aimag( kx(x) *   un(z,2,x,y) * conjg( un(z,3,x,y) ) &
+                                     + ky(x,y) * un(z,3,x,y) * conjg( un(z,1,x,y) ) &
+                                     + kz(z) *   un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+			  duu = factor * real( un(z,1,x,y) * conjg( u(z,1,x,y) ) &
+                                 + un(z,2,x,y) * conjg( u(z,2,x,y) ) &
+								 + un(z,3,x,y) * conjg( u(z,3,x,y) ) ) / (dt/2) + shear * uv
+
+              sample(k) = sample(k) + factor        !(*-- shell sample --*)
+              ek(k) = ek(k) + uu                    !(*-- 2 * energy sum --*)
+              dk(k) = dk(k) + ww                    !(*-- enstrophy sum --*)
+              hk(k) = hk(k) + uw                    !(*-- helicity sum --*)
+              tk(k) = tk(k) + duu                   !(*-- transfer sum --*)
+
+      end do;   end do;  end do
+
+ !(************************     finished accumulation :  compute final statistics     *************************)
+
+    sum_ek = 0;     sum_dk = 0;     sum_hk = 0;     sum_tk = 0
+    do k = nshells, 1, -1
+        sum_ek  = sum_ek + ek(k)
+        sum_dk  = sum_dk + dk(k)
+        sum_hk  = sum_hk + hk(k)
+        sum_tk  = sum_tk + tk(k)
+    end do
+
+    sync all
+    call co_sum(sum_ek,1)
+    call co_sum(sum_dk,1)
+    call co_sum(sum_hk,1)
+    call co_sum(sum_tk,1)
+    if (my_node == 1)  then
+        ! do k = 2, num_nodes
+        !     sum_ek = sum_ek + sum_ek[k]
+        !     sum_dk = sum_dk + sum_dk[k]
+        !     sum_hk = sum_hk + sum_hk[k]
+        !     sum_tk = sum_tk + sum_tk[k]
+        ! end do
+
+        if (step == 0)   write(6,*) "step   time     energy    enstrophy   helicity   transfer"
+        write(6,fmt="(i3, 5e11.3)")  step,  time,    sum_ek,   sum_dk,     sum_hk,    sum_tk
+    end if
+
+     total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end  subroutine  spectra
+
+  !(************************************************************************************************************
+  !        define_field  :   define initial flow field from scratch
+  !************************************************************************************************************)
+
+ subroutine define_field
+
+    use constants_module, only : zero
+    use run_size
+    use random_module
+    implicit none
+
+    real ::     k, k12, f, phi, theta1, theta2
+    complex ::  alpha, beta
+    integer(int64) ::  x, y, z
+    real, parameter :: klo=8, khi=16
+
+   call init_random_seed !(* seed a different pseudo-random number sequence for each image *)
+   time = 0
+
+   do x = first_x, last_x
+       do  y = 1, ny
+            do z = 1, nz
+                 call random_number(theta1 )
+                 call random_number(theta2 )
+                 call random_number(phi    )
+                 k   = sqrt( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                 k12 = sqrt( kx(x)**2 + ky(x,y)**2 )
+
+                 if ( k == 0  .or.  mk1(x)+mk2(y)+mk3(z)>2./9.  .or.  k < klo  .or.  k > khi ) &
+                     then;   f = 0
+                     else;   f = sqrt( 1./(2*pi) ) / k
+                 end if
+
+                 alpha = f * exp( (0,2) * pi * theta1 ) * cos( 2*pi * phi )
+                 beta  = f * exp( (0,2) * pi * theta2 ) * sin( 2*pi * phi )
+
+                 if (k12 == 0) &
+                 then; un(z,1,x,y) = alpha
+                       un(z,2,x,y) = beta
+                       un(z,3,x,y) = 0
+
+                 else; un(z,1,x,y) = ( beta * kz(z) * kx(x)   + alpha * k * ky(x,y) ) / ( k * k12 )
+                       un(z,2,x,y) = ( beta * kz(z) * ky(x,y) - alpha * k * kx(x)   ) / ( k * k12 )
+                       un(z,3,x,y) = - beta * k12 / k
+                 end if
+
+   end do;  end do; end do
+ end  subroutine define_field
+
+ !(***********************************************************************************************************
+ !          define_shifts  :    define coordinate shifts for control of 1-d alias errors
+ ! ***********************************************************************************************************)
+
+    subroutine  define_shifts
+    use constants_module, only : zero
+    use run_size
+    implicit none
+
+           integer(int64) ::  x, y, z
+           integer(int64), save ::  init = 0
+           real :: delta_x, delta_y, delta_z
+           integer :: i,seed_size
+
+           if (init == 0) &     !-- Note: delta's not carried over from previous run
+           then;
+                init = 1
+                call random_seed(size=seed_size)
+                call random_seed(put=[(1234567,i=1,seed_size)])!(* same random numbers for each image! *)
+                do  x = 1, nx/2;  sx(x,3) = exp (  (0,1) * ( pi / nx ) * k1(x) ); end do
+                do  y = 1, ny  ;  sy(y,3) = exp (  (0,1) * ( pi / ny ) * k2(y) ); end do
+                do  z = 1, nz  ;  sz(z,3) = exp (  (0,1) * ( pi / nz ) * k3(z) ); end do
+            else;
+                call random_number(delta_x); delta_x = 2*pi / nx * delta_x
+                do  x = 1, nx/2;  sx(x,1) = sx(x,3)
+                                  sx(x,2) = exp (  (0,1) * delta_x * k1(x) )
+                                  sx(x,3) = exp (  (0,1) * ( delta_x + pi / nx ) * k1(x) ); end do
+
+                call random_number(delta_y); delta_y = 2*pi / ny * delta_y
+                do  y = 1, ny  ;  sy(y,1) = sy(y,3)
+                                  sy(y,2) = exp (  (0,1) * delta_y * k2(y) )
+                                  sy(y,3) = exp (  (0,1) * ( delta_y + pi / ny ) * k2(y) ); end do
+
+                call random_number(delta_z); delta_z = 2*pi / nz * delta_z
+                do  z = 1, nz  ;  sz(z,1) = sz(z,3)
+                                  sz(z,2) = exp (  (0,1) * delta_z * k3(z) )
+                                  sz(z,3) = exp (  (0,1) * ( delta_z + pi / nz ) * k3(z) ); end do
+           end if
+
+ end  subroutine  define_shifts
+
+  !(***********************************************************************************************************
+  !       define_step  :   update time, metric, shifts for the next step
+  !**********************************************************************************************************)
+
+  subroutine  define_step
+      use run_size
+      implicit none
+
+    sync all
+
+    if (cfl /= 0) then
+cpu_time = cpu_time + WALLTIME()
+        call max_velmax
+cpu_time = cpu_time - WALLTIME()
+        dt = cfl / velmax
+    end if
+
+    if (        shear > 0               &
+        .and.  .01*b11*shear*dt < b12   &
+        .and.  b12 <= b11*shear*dt )    then
+            dt = b12 / ( b11 * shear )          !(* limit dt, hit the orthognal mesh *)
+            oflag = 1
+    else if ( mod (step,output_step) == 0 ) then
+            oflag = 1
+    end if
+
+    b12 = b12 - b11 * shear * dt
+
+    if ( b12 < -b22/2 )     rflag = 1                                   !(* remesh at the end of the step? *)
+    if ( step == nsteps )   stop = 1                                    !(* last step? *)
+
+ end   subroutine    define_step
+
+   !(***********************************************************************************************************
+   !      define_kspace  :   define physical wavespace from computational wavespace and metric
+   !**********************************************************************************************************)
+
+  subroutine    define_kspace
+    use run_size
+    implicit none
+
+     integer(int64) ::  x, y, z
+
+       do  x = 1, nx/2   ;   k1(x) = x - 1;     end do
+       do  y = 1, ny/2+1 ;   k2(y) = y - 1;     end do
+       do  z = 1, nz/2+1 ;   k3(z) = z - 1;     end do
+
+       do  y = ny/2+2, ny;   k2(y) = y - 1 - ny;    end do
+       do  z = nz/2+2, nz;   k3(z) = z - 1 - nz;    end do
+
+       do  x = 1, nx/2 ;     mk1(x) = (k1(x)/nx)**2;    kx(x) =   b11 * k1(x);  end do
+       do  z = 1, nz   ;     mk3(z) = (k3(z)/nz)**2;    kz(z) =   b33 * k3(z);  end do
+       do  y = 1, ny   ;     mk2(y) = (k2(y)/ny)**2
+                             do  x = 1, nx/2 ;    ky(x,y) = b22 * k2(y) + b12 * k1(x);  end do; end do
+
+end   subroutine    define_kspace
+
+  !(***********************************************************************************************************
+  !   phase 1 :  on entry, data-plane contains velocity in wave space.  interpolate database, shifted mesh,
+  !              and proceed to physical y space .
+  !************************************************************************************************************)
+
+  subroutine  phase1
+
+      use run_size
+      implicit none
+
+    complex :: shift
+    integer(int64) :: i, x, y, z
+
+   	do  x = first_x, last_x
+
+   		do  y = 1, ny;    do  z = 1, nz
+             shift = sz(z,rkstep+1) * sy(y,rkstep+1) * sx(x,rkstep+1)
+             u(z,1,x,y) = shift * u(z,1,x,y)
+             u(z,2,x,y) = shift * u(z,2,x,y)
+             u(z,3,x,y) = shift * u(z,3,x,y)
+        end do; end do
+
+!(*---------------------------   LEAVING FOURIER WAVE SPACE  --------------------------*)
+
+        do i = 1, 3
+            call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, one );    end do
+	end do
+
+ end   subroutine  phase1
+
+ !(**********************************************************************************************************
+ !     phase 2 :  on entry, data-plane contains velocity in physical y space, and wave x,z space on shifted
+ !                mesh.  Proceed to physical x,z space,  form nonlinear terms, and return to wave x,z space.
+ !***********************************************************************************************************)
+
+ subroutine  phase2
+
+     use run_size
+     implicit none
+
+     complex :: s2(nz,nx/2), vs(nz,nx/2)
+     integer(int64) ::  i, x, y, z
+     real :: v2r, v2i, s2r, s2i, u1r, u1i, u2r, u2i, u3r, u3i, u4r, u4i
+
+  velmax = 0
+
+  do  y = first_y, last_y
+
+      do  x = 1, nx/2 ;  do  z = 1, nz ;   vs(z,x) = ur(z,2,y,x);    end do; end do
+
+      do  i = 1, 3
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, one )
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, one )
+      end do
+
+!(*----------------------------  WELCOME TO PHYSICAL SPACE  --------------------------*)
+
+      do  x = 1, nx/2;  do  z = 1, nz
+           u1r = real(ur(z,1,y,x));  u1i = aimag(ur(z,1,y,x))
+           u2r = real(ur(z,2,y,x));  u2i = aimag(ur(z,2,y,x))
+           u3r = real(ur(z,3,y,x));  u3i = aimag(ur(z,3,y,x))
+
+      if ( rkstep == 1 ) velmax = max( velmax &
+                                     , b11*nx*abs(u1r) + b22*ny*abs(u2r) + b33*nz*abs(u3r) &
+                                     , b11*nx*abs(u1i) + b22*ny*abs(u2i) + b33*nz*abs(u3i) )
+
+           v2r = u2r * u2r;             v2i = u2i * u2i
+           s2r = u1r * u3r;             s2i = u1i * u3i
+           u4r = u2r * u3r;             u4i = u2i * u3i
+           u3r = u3r * u3r  -  v2r;     u3i = u3i * u3i  -  v2i
+           u2r = u1r * u2r;             u2i = u1i * u2i
+           u1r = u1r * u1r  -  v2r;     u1i = u1i * u1i  -  v2i
+
+           s2(z,x)     = cmplx(s2r, s2i)
+           ur(z,1,y,x) = cmplx(u1r, u1i)
+           ur(z,2,y,x) = cmplx(u2r, u2i)
+           ur(z,3,y,x) = cmplx(u3r, u3i)
+           ur(z,4,y,x) = cmplx(u4r, u4i)
+      end do;   end do
+
+!(*----------------------------  LEAVING PHYSICAL SPACE  --------------------------*)
+
+      do  i = 1,  4
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, -one )
+           do  z = 1, nz ;   ur(z,i,y,1) = cmplx(real(ur(z,i,y,1)),0);   end do
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, -one )
+      end do
+
+      call rfft ( nx, nz, s2, nz, one, trigx, -one )
+      do  z = 1, nz ;   s2(z,1) = cmplx(real(s2(z,1)),0);    end do
+      call cfft ( nz, nx/2, s2, one, nz, trigz, -one )
+
+      do  x = 1, nx/2;  do  z = 1, nz
+          ur(z,1,y,x) = kx(x) * ur(z,1,y,x) + kz(z) * s2(z,x)   - (0,1) * 2*nx*nz*shear * vs(z,x)
+          ur(z,3,y,x) = kx(x) * s2(z,x)     + kz(z) * ur(z,3,y,x)
+      end do;   end do
+  end do
+
+ end  subroutine  phase2
+
+  !(***********************************************************************************************************
+  !     phase 3 :  on entry, the data-plane contains the four stresses on a shifted mesh in physical y space,
+  !                wave x,z space.   Return to y  wave space on unshifted mesh and complete time derivative of
+  !                velocity ( not divergence free yet )
+  !***********************************************************************************************************)
+
+  subroutine  phase3
+
+      use run_size
+      implicit none
+
+    integer(int64) :: i, x, y, z
+    complex :: shift
+
+    do  x = first_x, last_x
+
+       do i = 1, 4
+           call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, -one )
+       end do
+
+!(*---------------------------   WELCOME TO FOURIER WAVE SPACE  --------------------------*)
+
+       do  y = 1, ny ;   do  z = 1, nz
+               shift = -dt / (4*nx*ny*nz) * (0,1)*conjg( sy(y,rkstep) * sz(z,rkstep) * sx(x,rkstep) )
+               u(z,1,x,y) = shift * (         u(z,1,x,y) + ky(x,y) * u(z,2,x,y) )
+               u(z,2,x,y) = shift * ( kx(x) * u(z,2,x,y) + kz(z)   * u(z,4,x,y) )
+               u(z,3,x,y) = shift * (         u(z,3,x,y) + ky(x,y) * u(z,4,x,y) )
+       end do;  end do
+    end do
+
+ end   subroutine  phase3
+
+  !(***********************************************************************************************************
+  !   pressure :  add the gradient of a scalar, enforce continuity ( zero divergence )
+  !***********************************************************************************************************)
+
+  subroutine  pressure
+
+      use run_size
+      implicit none
+
+     complex :: psi
+     integer(int64) :: x, y, z
+
+      do x = first_x, last_x ;     do  y = 1, ny
+
+            if ( x /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( kx(x) * u(z,1,x,y) + ky(x,y) * u(z,2,x,y) + kz(z) * u(z,3,x,y) ) &
+                              / ( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                        u(z,1,x,y) = u(z,1,x,y) - kx(x) * psi
+                        u(z,2,x,y) = u(z,2,x,y) - ky(x,y) * psi
+                        u(z,3,x,y) = u(z,3,x,y) - kz(z) * psi
+                   end do
+			else if ( y /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( ky(1,y) * u(z,2,1,y) + kz(z) * u(z,3,1,y) ) &
+                              / ( ky(1,y)**2 + kz(z)**2 )
+                        u(z,2,1,y) = u(z,2,1,y) - ky(1,y) * psi
+                        u(z,3,1,y) = u(z,3,1,y) - kz(z) * psi
+                  end do
+            else
+                  do  z = 1, nz ;    u(z,3,1,1) = 0;     end do
+            end if
+	 end do;    end do
+
+end   subroutine  pressure
+
+!(*****************************************************************************************************************
+!                                remesh  :  remesh the sheared coordinate system
+!*****************************************************************************************************************)
+
+subroutine   remesh
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    complex :: u2(nx+ny,nz), shift(nx+ny)
+    integer(int64) :: i, x, y, z
+
+    write(6,fmt="(A,i4)") "remesh image ", my_node
+
+    total_time =  total_time + WALLTIME()     !-- stop the clock!
+
+    do x = first_x, last_x
+
+        do  y = 1, nx+ny ;   shift(y) =  exp( (0,-2) * pi / (nx+ny) * k1(x) * (y - 1) ) / (nx+ny);    end do
+
+        do  i = 1,  3
+            do  z = 1, nz
+                do  y = 1, ny/2           ;   u2(y,z) = u(z,i,x,y);     end do
+                do  y = ny/2+1, nx+ny/2+1 ;   u2(y,z) = 0;              end do
+                do  y = nx+ny/2+2, nx+ny  ;   u2(y,z) = u(z,i,x,y-nx);  end do
+            end do
+
+            call cfft ( nx+ny, nz, u2, one , nx+ny, trigxy, one )
+
+            do  z = 1, nz ;  do  y = 1, nx+ny ;     u2(y,z) = u2(y,z) * shift(y);  end do;  end do
+
+            call cfft ( nx+ny, nz, u2, one, nx+ny, trigxy, -one )
+
+            do  z = 1, nz
+                do  y = 1, ny/2
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y,z)
+                    end if
+                end do
+                do  y = ny/2+1, ny
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y+nx,z)
+                    end if
+                end do
+            end do
+        end do
+
+        do  y = 1, ny ;    ky(x,y) = ky(x,y) + b22 * k1(x);     end do           !(* update ky for this x *)
+
+    end do
+
+    b12 = b12 + b22;     rflag = 0                                               !(*   update metric, account for remesh    *)
+
+    total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end subroutine   remesh
+
+ !(***********************************************************************************************************
+ !         copy_n_s,   copy_s_n :  copy data between data_s and data_n
+ !***********************************************************************************************************)
+
+ subroutine  copy_n_s
+
+     use run_size
+     implicit none
+
+	integer(int64) ::  x, y, z
+
+	do y = 1, ny;    do x = first_x, last_x;   do z = 1, nz
+		u(z,1,x,y) = un(z,1,x,y)
+		u(z,2,x,y) = un(z,2,x,y)
+    	u(z,3,x,y) = un(z,3,x,y)
+	end do; end do; end do
+ end  subroutine  copy_n_s
+
+ subroutine  copy_s_n
+
+     use run_size
+     implicit none
+
+	integer(int64) ::  x, y, z
+
+	do y = 1, ny;	do x = first_x, last_x;     do z = 1, nz
+		un(z,1,x,y) = u(z,1,x,y)
+		un(z,2,x,y) = u(z,2,x,y)
+		un(z,3,x,y) = u(z,3,x,y)
+    end do; end do; end do
+
+ end  subroutine  copy_s_n
+
+ !(***********************************************************************************************************
+ !                         advance :     second-order runge-kutta time step algorithm
+ !***********************************************************************************************************)
+
+ subroutine  advance
+
+     use run_size
+     implicit none
+
+    integer(int64) ::  x, y, z
+    real :: factor, xyfac, zfac(nz)	  !(* viscous integrating factors *)
+
+    if (rkstep == 1) then
+        do z = 1, nz;     zfac(z) = exp( - viscos * dt * kz(z)**2 ); end do
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                ky_(x,y) = ky(x,y)
+                ky(x,y) = b22 * k2(y) + b12 * k1(x)
+
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        factor = zfac(z) * exp( - viscos * dt * ( kx(x)**2 + ( ky_(x,y)**2 + ky_(x,y)*ky(x,y) + ky(x,y)**2 )/3 ) )
+
+                        un(z,1,x,y) = factor * ( un(z,1,x,y) + u(z,1,x,y) )
+                        u(z,1,x,y)  = un(z,1,x,y) + factor * u(z,1,x,y)
+
+                        un(z,2,x,y) = factor * ( un(z,2,x,y) + u(z,2,x,y) )
+                        u(z,2,x,y)  = un(z,2,x,y) + factor * u(z,2,x,y)
+
+                        un(z,3,x,y) = factor * ( un(z,3,x,y) + u(z,3,x,y) )
+                        u(z,3,x,y)  = un(z,3,x,y) + factor * u(z,3,x,y)
+                    end if
+        end do;  end do; end do
+
+    else if (rkstep == 2) then
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        u(z,1,x,y) = un(z,1,x,y) + u(z,1,x,y)
+                        u(z,2,x,y) = un(z,2,x,y) + u(z,2,x,y)
+                        u(z,3,x,y) = un(z,3,x,y) + u(z,3,x,y)
+                    end if
+        end do; end do; end do
+
+    end if
+
+ end  subroutine  advance
+
+ end   subroutine solve_navier_stokes
diff --git a/src/tests/integration/pde_solvers/navier-stokes/libfft_avx.a b/src/tests/integration/pde_solvers/navier-stokes/libfft_avx.a
new file mode 100644
index 0000000..b5ebfaf
Binary files /dev/null and b/src/tests/integration/pde_solvers/navier-stokes/libfft_avx.a differ
diff --git a/src/tests/integration/pde_solvers/navier-stokes/libfft_sse.a b/src/tests/integration/pde_solvers/navier-stokes/libfft_sse.a
new file mode 100644
index 0000000..6d7590b
Binary files /dev/null and b/src/tests/integration/pde_solvers/navier-stokes/libfft_sse.a differ
diff --git a/src/tests/integration/pde_solvers/navier-stokes/mpi-shear.f90 b/src/tests/integration/pde_solvers/navier-stokes/mpi-shear.f90
new file mode 100644
index 0000000..e2b602c
--- /dev/null
+++ b/src/tests/integration/pde_solvers/navier-stokes/mpi-shear.f90
@@ -0,0 +1,1034 @@
+! MPI 3D Navier-Stokes Solver Test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+
+!(*----------------------------------------------------------------------------------------------------------------------
+!         basic in-core shear code ( 7 words/node, not threaded, in-core, no file read/write )
+!------------------------------------------------------------------------------------------------------------------------*)
+
+! Define universal constants:
+! In the case of exactly representable numbers, the definitions are useful
+! to ensure subprogram argument type/kind/rank matching without having to
+! repind kind specifiers everywhere.
+module constants_module
+  use iso_fortran_env, only : int64
+  implicit none
+  private
+  public :: one,zero
+  integer(int64), parameter :: one=1_int64,zero=0_int64
+end module
+
+! Initialize the random seed with a varying seed to ensure a different
+! random number sequence for each invocation of subroutine, e.g. for
+! invocations on different images of a coarray parallel program.
+! Setting any seed values to zero is depcretated because it can result
+! in low-quality random number sequences.
+! (Source: https://gcc.gnu.org/onlinedocs/gfortran/RANDOM_005fSEED.html)
+module random_module
+  implicit none
+  private
+  public :: init_random_seed
+contains
+  subroutine init_random_seed()
+            use iso_fortran_env, only: int64
+            implicit none
+            integer, allocatable :: seed(:)
+            integer :: i, n, un, istat, dt(8), pid
+            integer(int64) :: t
+
+            call random_seed(size = n)
+            allocate(seed(n))
+            ! First try if the OS provides a random number generator
+            open(newunit=un, file="/dev/urandom", access="stream", &
+                 form="unformatted", action="read", status="old", iostat=istat)
+            if (istat == 0) then
+               read(un) seed
+               close(un)
+            else
+               ! Fallback to XOR:ing the current time and pid. The PID is
+               ! useful in case one launches multiple instances of the same
+               ! program in parallel.
+               call system_clock(t)
+               if (t == 0) then
+                  call date_and_time(values=dt)
+                  t = (dt(1) - 1970) * 365_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(2) * 31_int64 * 24 * 60 * 60 * 1000 &
+                       + dt(3) * 24_int64 * 60 * 60 * 1000 &
+                       + dt(5) * 60 * 60 * 1000 &
+                       + dt(6) * 60 * 1000 + dt(7) * 1000 &
+                       + dt(8)
+               end if
+               pid = getpid()
+               t = ieor(t, int(pid, kind(t)))
+               do i = 1, n
+                  seed(i) = lcg(t)
+               end do
+            end if
+            call random_seed(put=seed)
+          contains
+            ! This simple PRNG might not be good enough for real work, but is
+            ! sufficient for seeding a better PRNG.
+            function lcg(s)
+              integer :: lcg
+              integer(int64) :: s
+              if (s == 0) then
+                 s = 104729
+              else
+                 s = mod(s, 4294967296_int64)
+              end if
+              s = mod(s * 279470273_int64, 4294967291_int64)
+              lcg = int(mod(s, int(huge(0), int64)), kind(0))
+            end function lcg
+          end subroutine init_random_seed
+end module random_module
+
+module run_size
+    use iso_fortran_env, only : int64,real64
+    implicit none
+      include 'mpif.h'
+        real ::  viscos, shear, b11, b22, b33, b12, velmax, max_velmax
+        integer(int64) ::  nx, ny, nz, nsteps, output_step
+        integer(int64) :: my, mx, first_y, last_y, first_x, last_x
+        integer(int64) :: ierror
+        real(real64) :: cpu_time, tran_time, total_time
+        real(real64) :: max_cpu_time, max_tran_time, max_total_time
+        real(real64) :: min_cpu_time, min_tran_time, min_total_time
+
+        real ::  time, cfl, dt
+        integer(int64) :: my_node, num_nodes
+        real, parameter :: pi = 3.141592653589793
+
+contains
+
+    subroutine global_times()
+        call MPI_REDUCE(total_time, max_total_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD, ierror)
+        call MPI_REDUCE(total_time, min_total_time, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD, ierror)
+        call MPI_REDUCE(tran_time, max_tran_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD, ierror)
+        call MPI_REDUCE(tran_time, min_tran_time, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD, ierror)
+        call MPI_REDUCE(cpu_time, max_cpu_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD, ierror)
+        call MPI_REDUCE(cpu_time, min_cpu_time, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD, ierror)
+    end subroutine global_times
+
+subroutine copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
+  implicit none
+  complex, intent(in)  :: A(0:*)
+  complex, intent(out) :: B(0:*)
+  integer(int64), intent(in) :: n1, sA1, sB1
+  integer(int64), intent(in) :: n2, sA2, sB2
+  integer(int64), intent(in) :: n3, sA3, sB3
+  integer(int64) i,j,k
+
+  do k=0,n3-1
+     do j=0,n2-1
+        do i=0,n1-1
+           B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
+        end do
+     end do
+  end do
+end subroutine copy3
+
+end module run_size
+
+program mshear
+
+  !(***********************************************************************************************************
+  !                   m a i n   p r o g r a m
+  !***********************************************************************************************************)
+      use run_size
+      implicit none
+
+      interface
+        subroutine solve_navier_stokes
+        end subroutine solve_navier_stokes
+      end interface
+
+  call MPI_INIT(ierror)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, my_node, ierror)
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, num_nodes, ierror)
+
+      if( my_node == 0 ) then
+
+        write(6,*) "nx,ny,nz : ";               read(5,*) nx, ny, nz
+        if ( mod(nx/2,num_nodes) /= 0) then;    write(6,*) "nx/2 not multiple of num_nodes";    stop;   end if
+        if ( mod(ny,num_nodes) /= 0) then;      write(6,*) " ny not multiple of num_nodes";     stop;   end if
+
+        write(6,*) "viscos, shear : ";          read(5,*) viscos, shear
+        write(6,*) "b11 b22 b33 b12 : ";        read(5,*) b11, b22, b33, b12
+        write(6,*) "nsteps, output_step : ";    read(5,*) nsteps, output_step
+
+        write(6,fmt="(3(A,i4))") "nx =",nx,   "   ny =",ny,   "   nz =",nz
+        write(6,fmt="(2(A,f7.3))") "viscos = ", viscos, "      shear = ", shear
+        write(6,fmt="(A,4f7.3)") "b11 b22 b33 b12 = ", b11, b22, b33, b12
+        write(6,fmt="(2(A,i6))") "nsteps = ", nsteps, "       output_step = ", output_step
+
+        write(6,fmt="(A,i4,A)") "----------------- running on ", num_nodes, " images -------------------"
+
+      end if
+
+        call MPI_BCAST( nx, 1, MPI_INT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( ny, 1, MPI_INT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( nz, 1, MPI_INT, 0, MPI_COMM_WORLD, ierror )
+
+        call MPI_BCAST( viscos, 1, MPI_FLOAT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( shear, 1, MPI_FLOAT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( b11, 1, MPI_FLOAT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( b22, 1, MPI_FLOAT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( b33, 1, MPI_FLOAT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( b12, 1, MPI_FLOAT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( nsteps, 1, MPI_INT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( output_step, 1, MPI_INT, 0, MPI_COMM_WORLD, ierror )
+        call MPI_BARRIER(MPI_COMM_WORLD, ierror)
+
+ 	    mx = nx/2 / num_nodes;  first_x = my_node*mx + 1;   last_x  = my_node*mx + mx
+        my = ny / num_nodes;    first_y = my_node*my + 1;   last_y  = my_node*my + my
+
+    call solve_navier_stokes
+
+end program mshear
+
+!  (***********************************************************************************************************
+!             n a v i e r - s t o k e s   s o l v e r
+!  ************************************************************************************************************)
+
+  subroutine solve_navier_stokes
+      use run_size
+      implicit none
+
+  !(*****************************   declarations     ****************************************)
+
+       integer(int64) ::  stop, rflag, oflag, step, rkstep, nshells, msg_size
+       real ::  k1(nx/2), k2(ny), k3(nz), mk1(nx/2), mk2(ny), mk3(nz) &
+              , kx(nx/2), ky_(nx/2,ny), ky(nx/2,ny), kz(nz)
+       complex :: sx(nx/2,3), sy(ny,3), sz(nz,3)
+       integer(int64) :: trigx, trigy, trigz, trigxy
+
+       complex, allocatable ::  u(:,:,:,:)    ! u(nz,4,first_x:last_x,ny)    !(*-- x-y planes --*)
+       complex, allocatable ::  ur(:,:,:,:)   !ur(nz,4,first_y:last_y,nx/2)  !(*-- x-z planes --*)
+       complex, allocatable ::  un(:,:,:,:)   !un(nz,3,first_x:last_x,ny)    !(*-- x-y planes --*)
+       complex, allocatable :: bufr(:)
+
+interface
+!--------    note: integer(int64)'s required for FFT's and other assembly-coded externals   ------
+
+   function ctrig( len ) bind(C)               !(*-- define complex FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in) :: len
+        integer(int64) :: ctrig     !-- C pointer!
+   end function ctrig
+
+   function rtrig( len ) bind(C)              !(*-- define real FFT trig table --*)
+        import int64
+        integer(int64), value, intent(in):: len
+        integer(int64) :: rtrig     !-- C pointer!
+   end function rtrig
+
+   subroutine cfft( len, lot, data, inc, jmp, ctrig, isign ) bind(C)    !(*-- complex FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, ctrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine cfft
+
+   subroutine rfft( len, lot, data, inc, jmp, rtrig, isign ) bind(C)   !(*-- real FFT --*)
+        import int64
+        integer(int64), value, intent(in) :: len, lot, inc, jmp, rtrig, isign
+        complex, dimension(0:0), intent(in) :: data
+   end subroutine rfft
+
+   function WALLTIME() bind(C, name = "WALLTIME")
+       import real64
+       real(real64) :: WALLTIME
+   end function WALLTIME
+
+
+end interface
+
+       trigx = rtrig( nx )
+       trigy = ctrig( ny )
+       trigz = ctrig( nz )
+	   trigxy = ctrig( nx+ny )
+
+       msg_size = nz*4*mx*my     !-- message size (complex data items)
+
+       allocate (  u(nz , 4 , first_x:last_x , ny)   )   !(*-- y-z planes --*)
+       allocate ( ur(nz , 4 , first_y:last_y , nx/2) )   !(*-- x-z planes --*)
+       allocate ( un(nz , 3 , first_x:last_x , ny)   )   !(*-- y-z planes --*)
+       allocate ( bufr(msg_size) )
+
+       stop = 0;   step = 0;  rkstep = 2;  rflag = 0;  cfl = 1;   dt = 0
+       nshells = max( nx,ny,nz )
+
+                        call define_kspace
+                        call define_field
+                        call enforce_conjugate_symmetry
+						call copy_n_s
+                        call define_shifts
+
+       total_time =  -WALLTIME()      !-- start the clock
+
+       tran_time = 0;   cpu_time = -WALLTIME()
+
+  !(*********************************   begin execution loop   *****************************************)
+
+       do while (stop == 0)
+
+                        call phase1
+            rkstep = 1
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call define_step
+                        call define_shifts
+                        call phase3
+                        call pressure
+		if (oflag /= 0) call spectra
+                        call advance
+                        call phase1
+            rkstep = 2
+                        call transpose_X_Y
+                        call phase2
+                        call transpose_Y_X
+                        call phase3
+                        call advance
+                        call pressure
+        if (rflag /= 0) call remesh
+                        call copy_s_n
+
+                		step = step + 1
+                        time = time + dt
+       end do
+
+  !(*********************************   end execution loop   ***********************************************)
+
+       deallocate ( u, ur, un )
+       deallocate ( bufr )
+       call MPI_BARRIER(MPI_COMM_WORLD, ierror)  !-- other nodes wait for broadcast!
+
+       total_time =  total_time + WALLTIME()    !-- stop the clock
+       cpu_time =  cpu_time + WALLTIME()    !-- stop the clock
+       call global_times
+
+       if (my_node == 0 )   write(6,fmt="(3(10X,A,2f7.2))")  &
+                            , "total_time ", min_total_time/step, max_total_time/step &
+                            , "cpu_time ", min_cpu_time/step, max_cpu_time/step &
+                            , "tran_time ", min_tran_time/step, max_tran_time/step
+
+
+!       write(6,fmt="(A,i4,3f7.2)")  "image ", my_node, total_time/step, cpu_time/step, tran_time/step
+
+
+contains
+
+ !(***********************************************************************************************************
+ !                          transpose the Y and Z planes
+ !***********************************************************************************************************)
+
+!-----                   u(nz,4,mx,my*num_nodes) [num_nodes]
+!-----                  ur(nz,4,my,mx*num_nodes) [num_nodes]
+!-----                bufr(nz,4,my,mx) or bufr(nz,4,mx,my)
+
+!-------------   out-of-place transpose data_s --> data_r  ----------------------------
+
+ subroutine transpose_X_Y
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    integer(int64) :: to, from, stage, idr(0:num_nodes-1), ids, send_tag, recv_tag
+    integer(int64) :: send_status(MPI_STATUS_SIZE), recv_status(MPI_STATUS_SIZE)
+
+    cpu_time = cpu_time + WALLTIME()
+ call MPI_BARRIER(MPI_COMM_WORLD, ierror)   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+!--------------   issue all block receives ... tags = 256*dst_image+src_image  ------------------
+
+    do stage = 1, num_nodes-1
+        from = mod( my_node+stage, num_nodes )
+        recv_tag = 256*my_node + from
+        call MPI_IRECV  ( ur(1,1,first_y,1+from*mx) &
+                        ,   msg_size*2,  MPI_REAL,  from, recv_tag,  MPI_COMM_WORLD,  idr(stage),  ierror)
+    end do
+
+!--------------   transpose my image's block (no communication needed)  ------------------
+
+    call copy3 (    u(1,1,first_x,1+my_node*my) &                   !-- intra-node transpose
+                ,  ur(1,1,first_y,1+my_node*mx) &                   !-- no inter-node transpose needed
+                ,   nz*3, one, one        &                                 !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+!--------------   issue all block sends ... tags = 256*dst_image+src_image  ------------------
+
+
+    do stage = 1, num_nodes-1   !-- process sends in order
+        to = mod( my_node+stage, num_nodes )
+        send_tag = 256*to + my_node
+        call copy3 ( u(1,1,first_x,1+to*my), bufr &                !-- intra-node transpose from buffer
+                ,   nz*3, one, one        &                             !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+        call MPI_SEND ( bufr &
+                        ,   msg_size*2,  MPI_REAL,  to, send_tag,  MPI_COMM_WORLD,  ierror)
+    end do
+
+!--------------   wait on receives   ------------------
+
+    do stage = 1, num_nodes-1
+        call MPI_WAIT( idr(stage),  recv_status,  ierror )
+    end do
+
+call MPI_BARRIER(MPI_COMM_WORLD, ierror)     !--  wait for other nodes to finish transpose (not needed)
+    tran_time = tran_time + WALLTIME()
+    cpu_time = cpu_time - WALLTIME()
+
+ end  subroutine transpose_X_Y
+
+!-------------   out-of-place transpose data_r --> data_s  ----------------------------
+
+ subroutine transpose_Y_X
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    integer(int64) :: to, from, stage, idr(0:num_nodes-1), ids, send_tag, recv_tag
+    integer(int64) :: send_status(MPI_STATUS_SIZE), recv_status(MPI_STATUS_SIZE)
+
+    cpu_time = cpu_time + WALLTIME()
+ call MPI_BARRIER(MPI_COMM_WORLD, ierror)   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+!--------------   issue all block receives ... tags = 256*dst_image+src_image  ------------------
+
+    do stage = 1, num_nodes-1
+        from = mod( my_node+stage, num_nodes )
+        recv_tag = 256*my_node + from
+        call MPI_IRECV  ( u(1,1,first_x,1+from*my) &
+                        ,   msg_size*2,  MPI_REAL,  from, recv_tag,  MPI_COMM_WORLD,  idr(stage),  ierror)
+    end do
+
+!--------------   transpose my image's block (no communication needed)  ------------------
+
+    call copy3 (   ur(1,1,first_y,1+my_node*mx) &                   !-- intra-node transpose
+                ,   u(1,1,first_x,1+my_node*my) &                   !-- no inter-node transpose needed
+                ,   nz*4, one, one    &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+!--------------   issue all block sends ... tags = 256*dst_image+src_image  ------------------
+
+
+    do stage = 1, num_nodes-1   !-- process sends in order
+        to = mod( my_node+stage, num_nodes )
+        send_tag = 256*to + my_node
+        call copy3 ( ur(1,1,first_y,1+to*mx), bufr &                !-- intra-node transpose from buffer
+                ,   nz*4, one, one    &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+        call MPI_SEND ( bufr &
+                        ,   msg_size*2,  MPI_REAL,  to, send_tag,  MPI_COMM_WORLD,  ierror)
+    end do
+
+!--------------   wait on receives   ------------------
+
+    do stage = 1, num_nodes-1
+        call MPI_WAIT( idr(stage),  recv_status,  ierror )
+    end do
+
+call MPI_BARRIER(MPI_COMM_WORLD, ierror)     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+    cpu_time = cpu_time - WALLTIME()
+
+ end  subroutine transpose_Y_X
+
+
+!(*************************************************************************************************************
+!           enforce conjugate symmetry for plane kx=0 of wavespace  (half of this plane is redundant)
+!***************************************************************************************************************)
+
+  subroutine enforce_conjugate_symmetry
+
+    integer(int64) ::  i, x, y, z
+
+!(*------------------------- un( K ) = conjg( un( -K ) ) ---------------------*)
+
+    if (my_node == 0 ) then     !-- x=1 is in node=1
+        x = 1
+        do i = 1, 3
+            z = 1;              y = 1;              un(z,i,x,y) = 0
+            z = 1;              do y = 2, ny/2;     un(z,i,x,y) = conjg( un(z,i,x,ny+2-y) );        end do
+            do z = 2, nz/2;     y = 1;              un(z,i,x,y) = conjg( un(nz+2-z,i,x,y) );        end do
+            do z = 2, nz/2;     do y = 2, ny;       un(z,i,x,y) = conjg( un(nz+2-z,i,x,ny+2-y) );   end do;	end do
+        end do
+    end if
+end  subroutine enforce_conjugate_symmetry
+
+ !(***********************************************************************************************************
+ !                spectra :  accumulate spectra and other statistics over flow field
+ !***********************************************************************************************************)
+
+ subroutine  spectra
+
+      use run_size
+     implicit none
+
+    integer(int64) :: k, x, y, z
+    real :: kk, ww, uw, uu, uv, duu, factor   &
+          , ek(nshells), dk(nshells), hk(nshells), tk(nshells), sample(nshells)
+    real, save ::  sum_ek, sum_dk, sum_hk, sum_tk, ek_sum, dk_sum, hk_sum, tk_sum
+
+      total_time =  total_time + WALLTIME()     !-- stop the clock!  time/step does not include spectra time
+
+	  oflag = 0
+      ek = 0;       dk = 0;       hk = 0;     tk = 0;   sample = 0
+
+ !(*---------------------   three dimensional spectra  -----------------------*)
+
+      do x = first_x, last_x;   do y = 1, ny;      do  z = 1, nz
+
+            if( mk1(x)+mk2(y)+mk3(z) > 2./9. ) &
+                              then;   factor = 0
+            else if (x == 1)  then;   factor = 1
+                              else;   factor = 2
+            end if
+
+            kk = kx(x)**2 + ky(x,y)**2 + kz(z)**2
+            k = 1 + int( sqrt( kk ) + 0.5 )
+
+			  uu = factor * real( un(z,1,x,y) * conjg( un(z,1,x,y) ) &
+                                + un(z,2,x,y) * conjg( un(z,2,x,y) ) &
+                                + un(z,3,x,y) * conjg( un(z,3,x,y) ) )
+              ww = kk * uu
+              uv = factor * real( un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+              uw = factor * 2 * aimag( kx(x) *   un(z,2,x,y) * conjg( un(z,3,x,y) ) &
+                                     + ky(x,y) * un(z,3,x,y) * conjg( un(z,1,x,y) ) &
+                                     + kz(z) *   un(z,1,x,y) * conjg( un(z,2,x,y) ) )
+
+			  duu = factor * real( un(z,1,x,y) * conjg( u(z,1,x,y) ) &
+                                 + un(z,2,x,y) * conjg( u(z,2,x,y) ) &
+								 + un(z,3,x,y) * conjg( u(z,3,x,y) ) ) / (dt/2) + shear * uv
+
+              sample(k) = sample(k) + factor        !(*-- shell sample --*)
+              ek(k) = ek(k) + uu                    !(*-- 2 * energy sum --*)
+              dk(k) = dk(k) + ww                    !(*-- enstrophy sum --*)
+              hk(k) = hk(k) + uw                    !(*-- helicity sum --*)
+              tk(k) = tk(k) + duu                   !(*-- transfer sum --*)
+
+      end do;   end do;  end do
+
+ !(************************     finished accumulation :  compute final statistics     *************************)
+
+    sum_ek = 0;     sum_dk = 0;     sum_hk = 0;     sum_tk = 0
+    do k = nshells, 1, -1
+        sum_ek  = sum_ek + ek(k)
+        sum_dk  = sum_dk + dk(k)
+        sum_hk  = sum_hk + hk(k)
+        sum_tk  = sum_tk + tk(k)
+    end do
+
+    call MPI_BARRIER(MPI_COMM_WORLD, ierror)
+    call MPI_REDUCE(sum_ek, ek_sum, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD, ierror);  sum_ek = ek_sum
+    call MPI_REDUCE(sum_dk, dk_sum, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD, ierror);  sum_dk = dk_sum
+    call MPI_REDUCE(sum_hk, hk_sum, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD, ierror);  sum_hk = hk_sum
+    call MPI_REDUCE(sum_tk, tk_sum, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD, ierror);  sum_tk = tk_sum
+
+    if (my_node == 0 ) then
+        if (step == 0)   write(6,*) "step   time     energy    enstrophy   helicity   transfer"
+        write(6,fmt="(i3, 5e11.3)")  step,  time,    sum_ek,   sum_dk,     sum_hk,    sum_tk
+    end if
+
+     total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end  subroutine  spectra
+
+  !(************************************************************************************************************
+  !        define_field  :   define initial flow field from scratch
+  !************************************************************************************************************)
+
+ subroutine define_field
+
+    use random_module, only : init_random_seed
+    use run_size
+    implicit none
+
+    real ::     k, k12, f, phi, theta1, theta2
+    complex ::  alpha, beta
+    integer(int64) ::  x, y, z
+    real, parameter :: klo=8, khi=16
+
+   time = 0
+   call init_random_seed
+
+   do x = first_x, last_x
+       do  y = 1, ny
+            do z = 1, nz
+                 call random_number(theta1)
+                 call random_number(theta2)
+                 call random_number(phi   )
+                 k   = sqrt( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                 k12 = sqrt( kx(x)**2 + ky(x,y)**2 )
+
+                 if ( k == 0  .or.  mk1(x)+mk2(y)+mk3(z)>2./9.  .or.  k < klo  .or.  k > khi ) &
+                     then;   f = 0
+                     else;   f = sqrt( 1./(2*pi) ) / k
+                 end if
+
+                 alpha = f * exp( (0,2) * pi * theta1 ) * cos( 2*pi * phi )
+                 beta  = f * exp( (0,2) * pi * theta2 ) * sin( 2*pi * phi )
+
+                 if (k12 == 0) &
+                 then; un(z,1,x,y) = alpha
+                       un(z,2,x,y) = beta
+                       un(z,3,x,y) = 0
+
+                 else; un(z,1,x,y) = ( beta * kz(z) * kx(x)   + alpha * k * ky(x,y) ) / ( k * k12 )
+                       un(z,2,x,y) = ( beta * kz(z) * ky(x,y) - alpha * k * kx(x)   ) / ( k * k12 )
+                       un(z,3,x,y) = - beta * k12 / k
+                 end if
+
+   end do;  end do; end do
+ end  subroutine define_field
+
+ !(***********************************************************************************************************
+ !          define_shifts  :    define coordinate shifts for control of 1-d alias errors
+ ! ***********************************************************************************************************)
+
+    subroutine  define_shifts
+    use run_size
+    implicit none
+
+           integer ::  seed_size
+           integer(int64) ::  x, y, z, i
+           integer(int64), save ::  init = 0
+           real :: delta_x, delta_y, delta_z
+
+           if (init == 0) &     !-- Note: delta's not carried over from previous run
+           then;
+                init = 1
+                call random_seed(size=seed_size)
+                call random_seed(put=[(1234567,i=1,seed_size)])!(* same random numbers for each image! *)
+                do  x = 1, nx/2;  sx(x,3) = exp (  (0,1) * ( pi / nx ) * k1(x) ); end do
+                do  y = 1, ny  ;  sy(y,3) = exp (  (0,1) * ( pi / ny ) * k2(y) ); end do
+                do  z = 1, nz  ;  sz(z,3) = exp (  (0,1) * ( pi / nz ) * k3(z) ); end do
+            else;
+                call random_number(delta_x);delta_x = 2*pi / nx * delta_x
+                do  x = 1, nx/2;  sx(x,1) = sx(x,3)
+                                  sx(x,2) = exp (  (0,1) * delta_x * k1(x) )
+                                  sx(x,3) = exp (  (0,1) * ( delta_x + pi / nx ) * k1(x) ); end do
+
+                call random_number(delta_y);delta_y = 2*pi / ny * delta_y
+                do  y = 1, ny  ;  sy(y,1) = sy(y,3)
+                                  sy(y,2) = exp (  (0,1) * delta_y * k2(y) )
+                                  sy(y,3) = exp (  (0,1) * ( delta_y + pi / ny ) * k2(y) ); end do
+
+                call random_number(delta_z);delta_z = 2*pi / nz * delta_z
+                do  z = 1, nz  ;  sz(z,1) = sz(z,3)
+                                  sz(z,2) = exp (  (0,1) * delta_z * k3(z) )
+                                  sz(z,3) = exp (  (0,1) * ( delta_z + pi / nz ) * k3(z) ); end do
+           end if
+
+ end  subroutine  define_shifts
+
+  !(***********************************************************************************************************
+  !       define_step  :   update time, metric, shifts for the next step
+  !**********************************************************************************************************)
+
+  subroutine  define_step
+      use run_size
+      implicit none
+
+    call MPI_BARRIER(MPI_COMM_WORLD, ierror)
+
+    if (cfl /= 0) then
+cpu_time = cpu_time + WALLTIME()
+        call MPI_ALLREDUCE(velmax, max_velmax, 1, MPI_REAL, MPI_MAX, MPI_COMM_WORLD, ierror)
+        velmax = max_velmax
+cpu_time = cpu_time - WALLTIME()
+        dt = cfl / velmax
+    end if
+
+    if (        shear > 0               &
+        .and.  .01*b11*shear*dt < b12   &
+        .and.  b12 <= b11*shear*dt )    then
+            dt = b12 / ( b11 * shear )          !(* limit dt, hit the orthognal mesh *)
+            oflag = 1
+    else if ( mod (step,output_step) == 0 ) then
+            oflag = 1
+    end if
+
+    b12 = b12 - b11 * shear * dt
+
+    if ( b12 < -b22/2 )     rflag = 1                                   !(* remesh at the end of the step? *)
+    if ( step == nsteps )   stop = 1                                    !(* last step? *)
+
+ end   subroutine    define_step
+
+   !(***********************************************************************************************************
+   !      define_kspace  :   define physical wavespace from computational wavespace and metric
+   !**********************************************************************************************************)
+
+  subroutine    define_kspace
+    use run_size
+    implicit none
+
+     integer(int64) ::  x, y, z
+
+       do  x = 1, nx/2   ;   k1(x) = x - 1;     end do
+       do  y = 1, ny/2+1 ;   k2(y) = y - 1;     end do
+       do  z = 1, nz/2+1 ;   k3(z) = z - 1;     end do
+
+       do  y = ny/2+2, ny;   k2(y) = y - 1 - ny;    end do
+       do  z = nz/2+2, nz;   k3(z) = z - 1 - nz;    end do
+
+       do  x = 1, nx/2 ;     mk1(x) = (k1(x)/nx)**2;    kx(x) =   b11 * k1(x);  end do
+       do  z = 1, nz   ;     mk3(z) = (k3(z)/nz)**2;    kz(z) =   b33 * k3(z);  end do
+       do  y = 1, ny   ;     mk2(y) = (k2(y)/ny)**2
+                             do  x = 1, nx/2 ;    ky(x,y) = b22 * k2(y) + b12 * k1(x);  end do; end do
+
+end   subroutine    define_kspace
+
+  !(***********************************************************************************************************
+  !   phase 1 :  on entry, data-plane contains velocity in wave space.  interpolate database, shifted mesh,
+  !              and proceed to physical y space .
+  !************************************************************************************************************)
+
+  subroutine  phase1
+
+      use constants_module, only : one
+      use run_size
+      implicit none
+
+    complex :: shift
+    integer(int64) :: i, x, y, z
+
+   	do  x = first_x, last_x
+
+   		do  y = 1, ny;    do  z = 1, nz
+             shift = sz(z,rkstep+1) * sy(y,rkstep+1) * sx(x,rkstep+1)
+             u(z,1,x,y) = shift * u(z,1,x,y)
+             u(z,2,x,y) = shift * u(z,2,x,y)
+             u(z,3,x,y) = shift * u(z,3,x,y)
+        end do; end do
+
+!(*---------------------------   LEAVING FOURIER WAVE SPACE  --------------------------*)
+
+        do i = 1, 3
+            call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, one );    end do
+	end do
+
+ end   subroutine  phase1
+
+ !(**********************************************************************************************************
+ !     phase 2 :  on entry, data-plane contains velocity in physical y space, and wave x,z space on shifted
+ !                mesh.  Proceed to physical x,z space,  form nonlinear terms, and return to wave x,z space.
+ !***********************************************************************************************************)
+
+ subroutine  phase2
+
+     use constants_module, only : one
+     use run_size
+     implicit none
+
+     complex :: s2(nz,nx/2), vs(nz,nx/2)
+     integer(int64) ::  i, x, y, z
+     real :: v2r, v2i, s2r, s2i, u1r, u1i, u2r, u2i, u3r, u3i, u4r, u4i
+
+  velmax = 0
+
+  do  y = first_y, last_y
+
+      do  x = 1, nx/2 ;  do  z = 1, nz ;   vs(z,x) = ur(z,2,y,x);    end do; end do
+
+      do  i = 1, 3
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, one )
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, one )
+      end do
+
+!(*----------------------------  WELCOME TO PHYSICAL SPACE  --------------------------*)
+
+      do  x = 1, nx/2;  do  z = 1, nz
+           u1r = real(ur(z,1,y,x));  u1i = aimag(ur(z,1,y,x))
+           u2r = real(ur(z,2,y,x));  u2i = aimag(ur(z,2,y,x))
+           u3r = real(ur(z,3,y,x));  u3i = aimag(ur(z,3,y,x))
+
+      if ( rkstep == 1 ) velmax = max( velmax &
+                                     , b11*nx*abs(u1r) + b22*ny*abs(u2r) + b33*nz*abs(u3r) &
+                                     , b11*nx*abs(u1i) + b22*ny*abs(u2i) + b33*nz*abs(u3i) )
+
+           v2r = u2r * u2r;             v2i = u2i * u2i
+           s2r = u1r * u3r;             s2i = u1i * u3i
+           u4r = u2r * u3r;             u4i = u2i * u3i
+           u3r = u3r * u3r  -  v2r;     u3i = u3i * u3i  -  v2i
+           u2r = u1r * u2r;             u2i = u1i * u2i
+           u1r = u1r * u1r  -  v2r;     u1i = u1i * u1i  -  v2i
+
+           s2(z,x)     = cmplx(s2r, s2i)
+           ur(z,1,y,x) = cmplx(u1r, u1i)
+           ur(z,2,y,x) = cmplx(u2r, u2i)
+           ur(z,3,y,x) = cmplx(u3r, u3i)
+           ur(z,4,y,x) = cmplx(u4r, u4i)
+      end do;   end do
+
+!(*----------------------------  LEAVING PHYSICAL SPACE  --------------------------*)
+
+      do  i = 1,  4
+           call rfft ( nx, nz,   ur(1,i,y,1), nz*4*my, one, trigx, -one )
+           do  z = 1, nz ;   ur(z,i,y,1) = cmplx(real(ur(z,i,y,1)),0);   end do
+           call cfft ( nz, nx/2, ur(1,i,y,1), one, nz*4*my, trigz, -one )
+      end do
+
+      call rfft ( nx, nz, s2, nz, one, trigx, -one )
+      do  z = 1, nz ;   s2(z,1) = cmplx(real(s2(z,1)),0);    end do
+      call cfft ( nz, nx/2, s2, one, nz, trigz, -one )
+
+      do  x = 1, nx/2;  do  z = 1, nz
+          ur(z,1,y,x) = kx(x) * ur(z,1,y,x) + kz(z) * s2(z,x)   - (0,1) * 2*nx*nz*shear * vs(z,x)
+          ur(z,3,y,x) = kx(x) * s2(z,x)     + kz(z) * ur(z,3,y,x)
+      end do;   end do
+  end do
+
+ end  subroutine  phase2
+
+  !(***********************************************************************************************************
+  !     phase 3 :  on entry, the data-plane contains the four stresses on a shifted mesh in physical y space,
+  !                wave x,z space.   Return to y  wave space on unshifted mesh and complete time derivative of
+  !                velocity ( not divergence free yet )
+  !***********************************************************************************************************)
+
+  subroutine  phase3
+
+      use constants_module, only : one
+      use run_size
+      implicit none
+
+    integer(int64) :: i, x, y, z
+    complex :: shift
+
+    do  x = first_x, last_x
+
+       do i = 1, 4
+           call cfft ( ny, nz, u(1,i,x,1), nz*4*mx, one, trigy, -one )
+       end do
+
+!(*---------------------------   WELCOME TO FOURIER WAVE SPACE  --------------------------*)
+
+       do  y = 1, ny ;   do  z = 1, nz
+               shift = -dt / (4*nx*ny*nz) * (0,1)*conjg( sy(y,rkstep) * sz(z,rkstep) * sx(x,rkstep) )
+               u(z,1,x,y) = shift * (         u(z,1,x,y) + ky(x,y) * u(z,2,x,y) )
+               u(z,2,x,y) = shift * ( kx(x) * u(z,2,x,y) + kz(z)   * u(z,4,x,y) )
+               u(z,3,x,y) = shift * (         u(z,3,x,y) + ky(x,y) * u(z,4,x,y) )
+       end do;  end do
+    end do
+
+ end   subroutine  phase3
+
+  !(***********************************************************************************************************
+  !   pressure :  add the gradient of a scalar, enforce continuity ( zero divergence )
+  !***********************************************************************************************************)
+
+  subroutine  pressure
+
+      use run_size
+      implicit none
+
+     complex :: psi
+     integer(int64) :: x, y, z
+
+      do x = first_x, last_x ;     do  y = 1, ny
+
+            if ( x /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( kx(x) * u(z,1,x,y) + ky(x,y) * u(z,2,x,y) + kz(z) * u(z,3,x,y) ) &
+                              / ( kx(x)**2 + ky(x,y)**2 + kz(z)**2 )
+                        u(z,1,x,y) = u(z,1,x,y) - kx(x) * psi
+                        u(z,2,x,y) = u(z,2,x,y) - ky(x,y) * psi
+                        u(z,3,x,y) = u(z,3,x,y) - kz(z) * psi
+                   end do
+			else if ( y /= 1 )  then
+                  do  z = 1, nz
+                        psi = ( ky(1,y) * u(z,2,1,y) + kz(z) * u(z,3,1,y) ) &
+                              / ( ky(1,y)**2 + kz(z)**2 )
+                        u(z,2,1,y) = u(z,2,1,y) - ky(1,y) * psi
+                        u(z,3,1,y) = u(z,3,1,y) - kz(z) * psi
+                  end do
+            else
+                  do  z = 1, nz ;    u(z,3,1,1) = 0;     end do
+            end if
+	 end do;    end do
+
+end   subroutine  pressure
+
+!(*****************************************************************************************************************
+!                                remesh  :  remesh the sheared coordinate system
+!*****************************************************************************************************************)
+
+subroutine   remesh
+
+    use constants_module, only : one
+    use run_size
+    implicit none
+
+    complex :: u2(nx+ny,nz), shift(nx+ny)
+    integer(int64) :: i, x, y, z
+
+    write(6,fmt="(A,i4)") "remesh image ", my_node
+
+    total_time =  total_time + WALLTIME()     !-- stop the clock!
+
+    do x = first_x, last_x
+
+        do  y = 1, nx+ny ;   shift(y) =  exp( (0,-2) * pi / (nx+ny) * k1(x) * (y - 1) ) / (nx+ny);    end do
+
+        do  i = 1,  3
+            do  z = 1, nz
+                do  y = 1, ny/2           ;   u2(y,z) = u(z,i,x,y);     end do
+                do  y = ny/2+1, nx+ny/2+1 ;   u2(y,z) = 0;              end do
+                do  y = nx+ny/2+2, nx+ny  ;   u2(y,z) = u(z,i,x,y-nx);  end do
+            end do
+
+            call cfft ( nx+ny, nz, u2, one, nx+ny, trigxy, one )
+
+            do  z = 1, nz ;  do  y = 1, nx+ny ;     u2(y,z) = u2(y,z) * shift(y);  end do;  end do
+
+            call cfft ( nx+ny, nz, u2, one, nx+ny, trigxy, -one )
+
+            do  z = 1, nz
+                do  y = 1, ny/2
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y,z)
+                    end if
+                end do
+                do  y = ny/2+1, ny
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) &
+                        then;  u(z,i,x,y) = 0
+                        else;  u(z,i,x,y) = u2(y+nx,z)
+                    end if
+                end do
+            end do
+        end do
+
+        do  y = 1, ny ;    ky(x,y) = ky(x,y) + b22 * k1(x);     end do           !(* update ky for this x *)
+
+    end do
+
+    b12 = b12 + b22;     rflag = 0                                               !(*   update metric, account for remesh    *)
+
+    total_time =  total_time - WALLTIME()     !-- restart the clock!
+ end subroutine   remesh
+
+ !(***********************************************************************************************************
+ !         copy_n_s,   copy_s_n :  copy data between data_s and data_n
+ !***********************************************************************************************************)
+
+ subroutine  copy_n_s
+
+     use run_size
+     implicit none
+
+	integer(int64) ::  x, y, z
+
+	do y = 1, ny;    do x = first_x, last_x;   do z = 1, nz
+		u(z,1,x,y) = un(z,1,x,y)
+		u(z,2,x,y) = un(z,2,x,y)
+    	u(z,3,x,y) = un(z,3,x,y)
+	end do; end do; end do
+ end  subroutine  copy_n_s
+
+ subroutine  copy_s_n
+
+     use run_size
+     implicit none
+
+	integer(int64) ::  x, y, z
+
+	do y = 1, ny;	do x = first_x, last_x;     do z = 1, nz
+		un(z,1,x,y) = u(z,1,x,y)
+		un(z,2,x,y) = u(z,2,x,y)
+		un(z,3,x,y) = u(z,3,x,y)
+    end do; end do; end do
+
+ end  subroutine  copy_s_n
+
+ !(***********************************************************************************************************
+ !                         advance :     second-order runge-kutta time step algorithm
+ !***********************************************************************************************************)
+
+ subroutine  advance
+
+     use run_size
+     implicit none
+
+    integer(int64) ::  x, y, z
+    real :: factor, xyfac, zfac(nz)	  !(* viscous integrating factors *)
+
+    if (rkstep == 1) then
+        do z = 1, nz;     zfac(z) = exp( - viscos * dt * kz(z)**2 ); end do
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                ky_(x,y) = ky(x,y)
+                ky(x,y) = b22 * k2(y) + b12 * k1(x)
+
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        factor = zfac(z) * exp( - viscos * dt * ( kx(x)**2 + ( ky_(x,y)**2 + ky_(x,y)*ky(x,y) + ky(x,y)**2 )/3 ) )
+
+                        un(z,1,x,y) = factor * ( un(z,1,x,y) + u(z,1,x,y) )
+                        u(z,1,x,y)  = un(z,1,x,y) + factor * u(z,1,x,y)
+
+                        un(z,2,x,y) = factor * ( un(z,2,x,y) + u(z,2,x,y) )
+                        u(z,2,x,y)  = un(z,2,x,y) + factor * u(z,2,x,y)
+
+                        un(z,3,x,y) = factor * ( un(z,3,x,y) + u(z,3,x,y) )
+                        u(z,3,x,y)  = un(z,3,x,y) + factor * u(z,3,x,y)
+                    end if
+        end do;  end do; end do
+
+    else if (rkstep == 2) then
+
+        do x = first_x, last_x
+            do  y = 1, ny
+                do z = 1, nz
+                    if (mk1(x)+mk2(y)+mk3(z) > 2./9.) then
+                        u(z,1,x,y) = 0;     u(z,2,x,y) = 0;     u(z,3,x,y) = 0
+                    else
+                        u(z,1,x,y) = un(z,1,x,y) + u(z,1,x,y)
+                        u(z,2,x,y) = un(z,2,x,y) + u(z,2,x,y)
+                        u(z,3,x,y) = un(z,3,x,y) + u(z,3,x,y)
+                    end if
+        end do; end do; end do
+
+    end if
+
+ end  subroutine  advance
+
+ end   subroutine solve_navier_stokes
diff --git a/src/tests/integration/pde_solvers/navier-stokes/walltime.o b/src/tests/integration/pde_solvers/navier-stokes/walltime.o
new file mode 100644
index 0000000..b4ad744
Binary files /dev/null and b/src/tests/integration/pde_solvers/navier-stokes/walltime.o differ
diff --git a/src/tests/performance/BurgersMPI/CMakeLists.txt b/src/tests/performance/BurgersMPI/CMakeLists.txt
new file mode 100644
index 0000000..b7c9736
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/CMakeLists.txt
@@ -0,0 +1,33 @@
+set(include_directory ${CMAKE_CURRENT_SOURCE_DIR}/../../integration/pde_solvers/include-files)
+set(library_directory ${CMAKE_CURRENT_SOURCE_DIR}/../../integration/pde_solvers/library)
+set(config_directory  ${CMAKE_CURRENT_BINARY_DIR}/../../integration/pde_solvers/library)
+
+if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Cray")
+  configure_file(${include_directory}/cray_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel")
+  configure_file(${include_directory}/intel_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU")
+  configure_file(${include_directory}/gfortran_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "VisualAge|XL")
+  configure_file(${include_directory}/ibm_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "PGI")
+  configure_file(${include_directory}/portlandgroup_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+elseif ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "NAG")
+  configure_file(${include_directory}/nag_capabilities.txt ${config_directory}/compiler_capabilities.txt COPYONLY)
+else()
+  message ("Unknown Fortran compiler: ${CMAKE_Fortran_COMPILER_ID}")
+endif()
+
+add_executable( mpi_burgers_pde
+  main.F90
+  ${library_directory}/object_interface.F90
+  ${library_directory}/ForTrilinos_assertion_utility.F90
+  ${library_directory}/ForTrilinos_error.F90
+  kind_parameters.F90
+  shared.F90
+  mpi_module.F90
+  mpi_share.F90
+  input_file.F90
+  periodic_2nd_order.F90
+)
+target_include_directories(mpi_burgers_pde PRIVATE ${config_directory})
diff --git a/src/tests/performance/BurgersMPI/Makefile b/src/tests/performance/BurgersMPI/Makefile
new file mode 100644
index 0000000..9c5aadf
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/Makefile
@@ -0,0 +1,100 @@
+mofo_root=..
+library_root=../library
+executable=burgers_mpi
+
+#include TAU_MAKEFILE $TAU_ROOT_DIR/x86_64/lib/selected_make_file
+
+# For benchmark purposes the -DBENCHMARK flag is required.
+# Another option is to add #define BENCHMARK to “compiler_capabilities.txt”
+# but it is not recommended.
+# A quick way to activate the benchmark mode is through make CMDFLAGS=-DBENCHMARK.
+
+# Note: A benchmark purpose is the comparison among this solver and the
+# Coarrays counterpart.
+
+#Cray compiler command
+cray=ftn -ew -h caf
+#GNU compiler command
+gnu=mpif90 -fcoarray=lib -ffree-form -ffree-line-length-none
+
+#Intel compiler command
+intel= tau_f90.sh -DTAU=1 -Bdynamic -standard-semantics -O3 -g -optCompInst
+intel= mpiifort -Bdynamic -standard-semantics -O3 -L/usr/lib64 #-lpmi
+
+#TAU compiler command
+tau=tauf90 -tau:serial,icpc,pdt -DTAU=1 -Bdynamic -standard-semantics -O3 -g -optCompInst
+#TAU compiler command cray
+tau_cray=tau_f90.sh -optCompInst -ew
+
+compile=$(gnu)
+#compile=$(intel)
+#compile=$(cray)
+#compile=$(tau_cray)
+# PLEASE UNCOMMENT THE # TO USE TAU
+
+OPTS=
+objects = periodic_2nd_order.o mpi_module.o shared.o mpi_share.o kind_parameters.o object_interface.o ForTrilinos_assertion_utility.o ForTrilinos_error.o input_file.o
+CFLAGS = -c  $(CMDFLAGS)
+
+$(executable): main.o $(objects) Makefile
+	$(compile) $(OPTS)  main.o $(objects) -o $(executable) -lcaf_mpi
+
+main.o: main.F90 $(objects) Makefile
+	$(compile) $(OPTS) $(CFLAGS) main.F90
+
+periodic_2nd_order.o: periodic_2nd_order.F90 preprocessor_definitions object_interface.o shared.o mpi_share.o kind_parameters.o ForTrilinos_assertion_utility.o ForTrilinos_error.o mpi_module.o input_file.o Makefile
+	$(compile) $(OPTS)$(CFLAGS) periodic_2nd_order.F90
+
+mpi_share.o: mpi_share.F90 Makefile mpi_module.o
+	$(compile) $(OPTS) $(CFLAGS) mpi_share.F90
+
+mpi_module.o: mpi_module.F90 kind_parameters.o object_interface.o shared.o ForTrilinos_assertion_utility.o ForTrilinos_error.o Makefile
+	$(compile) $(OPTS) $(CFLAGS) mpi_module.F90
+
+shared.o: shared.F90 Makefile kind_parameters.o
+	$(compile) $(OPTS) $(CFLAGS) shared.F90
+
+
+kind_parameters.o: kind_parameters.F90 Makefile
+	$(compile) $(OPTS)$(CFLAGS)  kind_parameters.F90
+
+input_file.o: input_file.F90 Makefile
+	$(compile) $(OPTS) $(CFLAGS) input_file.F90
+
+object_interface.o: $(library_root)/object_interface.F90 kind_parameters.o Makefile preprocessor_definitions
+	$(compile) $(OPTS) $(CFLAGS) $(library_root)/object_interface.F90
+
+ForTrilinos_assertion_utility.o: $(library_root)/ForTrilinos_assertion_utility.F90 Makefile
+	$(compile) $(OPTS) $(CFLAGS) $(library_root)/ForTrilinos_assertion_utility.F90
+
+ForTrilinos_error.o: $(library_root)/ForTrilinos_error.F90 Makefile
+	$(compile) $(OPTS)$(CFLAGS) $(library_root)/ForTrilinos_error.F90
+
+
+.PHONY : clean
+clean :
+	-rm -f  *.o *.mod $(executable) core fort.* compiler_capabilities.txt profile* MULT* *.trc *.edf $(library_root)/compiler_capabilities.txt
+
+preprocessor_definitions:
+ifeq ($(compile),$(gnu))
+	cp $(mofo_root)/include-files/gfortran_capabilities.txt compiler_capabilities.txt
+	cp $(mofo_root)/include-files/gfortran_capabilities.txt $(library_root)/compiler_capabilities.txt
+else
+  ifeq ($(compile),$(intel))
+	cp $(mofo_root)/include-files/intel_capabilities.txt compiler_capabilities.txt
+	cp $(mofo_root)/include-files/intel_capabilities.txt $(library_root)/compiler_capabilities.txt
+  else
+    ifeq ($(compile),$(cray))
+	cp $(mofo_root)/include-files/cray_capabilities.txt compiler_capabilities.txt
+	cp $(mofo_root)/include-files/cray_capabilities.txt $(library_root)/compiler_capabilities.txt
+    endif
+  endif
+endif
+ifeq ($(compile),$(tau))
+	cp $(mofo_root)/include-files/intel_capabilities.txt compiler_capabilities.txt
+	cp $(mofo_root)/include-files/intel_capabilities.txt $(library_root)/compiler_capabilities.txt
+endif
+ifeq ($(compile),$(tau_cray))
+	cp $(mofo_root)/include-files/tau_cray_capabilities.txt compiler_capabilities.txt
+	cp $(mofo_root)/include-files/tau_cray_capabilities.txt $(library_root)/compiler_capabilities.txt
+endif
diff --git a/src/tests/performance/BurgersMPI/input_file.F90 b/src/tests/performance/BurgersMPI/input_file.F90
new file mode 100644
index 0000000..1151332
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/input_file.F90
@@ -0,0 +1,33 @@
+! MPI 1D Burgers equation solver: input_file
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+
+module input_file
+  !scalable module for sharing input data about mesh/numerical scheme
+  use kind_parameters, only :ikind
+  implicit none
+ integer(ikind) ,parameter :: grid_resolution=819200 !134217728
+end module
diff --git a/src/tests/performance/BurgersMPI/kind_parameters.F90 b/src/tests/performance/BurgersMPI/kind_parameters.F90
new file mode 100644
index 0000000..04dc872
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/kind_parameters.F90
@@ -0,0 +1,35 @@
+! Copyright (c) 2011, Damian Rouson, Jim Xia, and Xiaofeng Xu.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the names of Damian Rouson, Jim Xia, and Xiaofeng Xu nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL DAMIAN ROUSON, JIM XIA, and XIAOFENG XU BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+module kind_parameters       ! Type kind parameter
+  implicit none
+  private
+  public :: rkind,ikind,ckind
+  integer ,parameter :: digits=8  ! num. digits of kind
+  integer ,parameter :: decades=9 ! num. representable decades
+  integer ,parameter :: rkind = selected_real_kind(digits)
+  integer ,parameter :: ikind = selected_int_kind(decades)
+  integer ,parameter :: ckind = selected_char_kind('default')
+end module
diff --git a/src/tests/performance/BurgersMPI/main.F90 b/src/tests/performance/BurgersMPI/main.F90
new file mode 100644
index 0000000..0784edd
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/main.F90
@@ -0,0 +1,105 @@
+! Copyright (c) 2011, Damian Rouson, Jim Xia, and Xiaofeng Xu.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the names of Damian Rouson, Jim Xia, and Xiaofeng Xu nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL DAMIAN ROUSON, JIM XIA, and XIAOFENG XU BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+module initializer
+  use kind_parameters ,only : rkind
+  implicit none
+contains
+  real(rkind) pure function u_initial(x)
+    real(rkind) ,intent(in) :: x
+    u_initial = 10._rkind*sin(x)
+  end function
+  real(rkind) pure function zero(x)
+    real(rkind) ,intent(in) :: x
+    zero = 0.
+  end function
+end module
+
+program main
+  use iso_fortran_env, only : output_unit
+  use kind_parameters ,only : rkind
+  use periodic_2nd_order_module, only : periodic_2nd_order, initial_field
+  use initializer ,only : u_initial,zero
+  use input_file ,only : grid_resolution !CZC
+  use shared
+  use mpi_share, only : mpi_object !CZC
+  implicit none
+  type(periodic_2nd_order), save :: u,half_uu,u_half
+  real(rkind) :: dt,half=0.5,t=0.,t_final=3.08,nu=1.
+  integer ,parameter :: base_output_unit=output_unit+10
+  integer :: step,iostat, steps, num_steps = 100000
+  character(:), allocatable :: iotype ! Allowable values: ’LISTDIRECTED’,’NAMELIST’, or ’DT’
+  character(:), allocatable :: iomsg
+  integer, allocatable :: v_list(:)
+  procedure(initial_field) ,pointer :: initial
+  real(rkind), parameter :: time_initial=0.
+  real(rkind), allocatable :: u_surface(:,:)
+  real(rkind) :: t_1, t_2, t_3
+
+  ! Test parameters
+  real(rkind), parameter :: pi=acos(-1._rkind),expected_zero_location=pi
+
+  call mpi_object%mpi_begin !initiate MPI functionality, CZC
+  local_grid_resolution=grid_resolution/num_procs !calculate how processors are shared, CZC
+
+#ifdef USING_TAU
+  call TAU_PROFILE_SET_NODE(my_id)
+#endif
+  call cpu_time(t_1)
+  initial => u_initial
+  call u%construct(initial,grid_resolution)
+  initial => zero
+  call half_uu%construct(initial,grid_resolution)
+  call u_half%construct(initial,grid_resolution)
+  call u%set_time(time_initial)
+  step = 1
+  call u%set_time((step-1)*dt)
+  !numerical scheme
+  call cpu_time(t_2)
+  dt = u%runge_kutta_2nd_step(nu ,grid_resolution)
+#ifdef BENCHMARK
+  do steps = 1, num_steps
+#else
+  do while (t<t_final)
+#endif
+    half_uu = u*u*half
+    u_half = u + (u%xx()*nu - half_uu%x())*dt*half ! first substep
+    half_uu = u_half*u_half*half
+    u  = u + (u_half%xx()*nu - half_uu%x())*dt ! second substep
+    t = t + dt
+    step = step + 1
+  end do
+  call cpu_time(t_3)
+  if (my_id == 0) print *, t_2 - t_1, t_3 - t_2, t_3 - t_1
+  !print *, 'this image = ', my_id, ' f_global = ',u%global_f(1:local_grid_resolution)
+  iomsg = "Output result: success."
+  !call u%output(70 + my_id,iotype,v_list,iostat,iomsg)
+   if (u%this_image_contains(expected_zero_location)) then
+     if (.not. u%has_a_zero_at(expected_zero_location)) error stop "Test failed."
+     print *,'Test passed.'
+   end if
+ call mpi_object%mpi_end !end MPI processes
+
+end program
diff --git a/src/tests/performance/BurgersMPI/mpi_module.F90 b/src/tests/performance/BurgersMPI/mpi_module.F90
new file mode 100644
index 0000000..1d70c77
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/mpi_module.F90
@@ -0,0 +1,118 @@
+! MPI 1D Burgers equation solver test: mpi_module
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+module mpi_module
+!declare what other modules are being used
+use kind_parameters, only: rkind, ikind
+use object_interface, only : object
+use ForTrilinos_assertion_utility, only : assert,error_message
+use shared
+
+implicit none
+!everything in this module aside from mpi_class is private
+private
+public ::mpi_class
+integer(ikind) :: program_status=0 !integer for keeping track of whether mpi has
+
+!started or not
+!extend object class so can make use of assertions
+type, extends(object) :: mpi_class
+
+contains
+procedure :: output !mandatory extension of object
+procedure, nopass :: mpi_begin ! initiate mpi
+procedure, nopass :: mpi_end ! end mpi
+procedure, nopass :: barrier
+procedure, nopass :: oned_message !communicate with neighboors for a 1d pde
+end type
+
+contains
+
+  subroutine output(this,unit,iotype,v_list,iostat,iomsg)
+    class(mpi_class), intent(in) :: this
+    integer, intent(in) :: unit ! Unit on which output happens (negative for internal file)
+    character(*), intent(in) :: iotype ! Allowable values: ’LISTDIRECTED’,’NAMELIST’, or ’DT’
+    integer, intent(in) :: v_list(:)
+    integer, intent(out) :: iostat
+    character(*), intent(inout) :: iomsg
+    integer(ikind) i
+    ! Requires
+    call assert(this%user_defined(),error_message("mpi_object%output recieved unitialized object."))
+      write (unit=unit,iostat=iostat,fmt="(i8,3(f12.4,2x))")
+   end subroutine
+
+  subroutine mpi_begin
+  integer :: dims(1), periods(1), reorder
+   if (program_status .eq. 0) then !prevent accidentally starting mpi when
+   !already has been initiated
+    call MPI_INIT(ierr) !initiate MPI
+    call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr) !retrieve processer count
+    call MPI_COMM_RANK(MPI_COMM_WORLD, my_id, ierr) !retrive processor rank
+    dims = num_procs
+    reorder = 1
+    periods = 1
+    call MPI_CART_CREATE(MPI_COMM_WORLD, 1, dims, periods, reorder, MPI_COMM_CART, ierr)
+    call MPI_COMM_RANK(MPI_COMM_CART, my_id, ierr)
+    call MPI_CART_SHIFT(MPI_COMM_CART, 0, 1, left_id, right_id, ierr)
+    program_status=1
+   endif
+  end subroutine
+
+  subroutine mpi_end
+    if(program_status==1) then !prevent ending mpi if it is not running
+     call MPI_FINALIZE(ierr)
+     program_status=0
+     endif
+  end subroutine
+
+  subroutine barrier
+    call mpi_barrier(mpi_comm_world, ierr)
+  end subroutine
+
+  subroutine oned_message(periodic,local_grid_resolution,left_sub,right_sub)
+  integer (ikind), intent(in)  :: local_grid_resolution
+  real (rkind), intent(in), dimension(:) :: periodic ! keep track of global f
+  real (rkind), intent(inout) ::left_sub,right_sub !images from nearby processors
+  DOUBLE PRECISION left,right  !intermediate variable for storing messages
+  !  assertions to ensure that proper input provided to subroutine
+    call assert(size(periodic)>= local_grid_resolution,error_message("size of local function too small."))
+    call assert(local_grid_resolution>0,error_message("invalid local grid spacing."))
+
+    if (num_procs >1) then !no need to communicate if only 1 processor
+      call MPI_SENDRECV(periodic(1),1,MPI_DOUBLE_PRECISION,left_id,0, &
+                      right,1,MPI_DOUBLE_PRECISION,right_id,0,MPI_COMM_CART,status,ierr)
+      call MPI_SENDRECV(periodic(local_grid_resolution),1,MPI_DOUBLE_PRECISION,right_id,0, &
+                      left,1,MPI_DOUBLE_PRECISION,left_id,0,MPI_COMM_CART,status,ierr)
+      left_sub = left
+      right_sub = right
+    else!incase only one processor
+      left_sub = periodic(local_grid_resolution)
+      right_sub = periodic(1)
+    endif
+
+end subroutine
+
+end module
diff --git a/src/tests/performance/BurgersMPI/mpi_share.F90 b/src/tests/performance/BurgersMPI/mpi_share.F90
new file mode 100644
index 0000000..dddf8c2
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/mpi_share.F90
@@ -0,0 +1,31 @@
+! MPI 1D Burgers equation solver test: mpi_share
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+module mpi_share
+  !module exclusively for initiating and sharing an mpi_object
+  use mpi_module, only :mpi_class
+  type (mpi_class) :: mpi_object
+end module
diff --git a/src/tests/performance/BurgersMPI/periodic_2nd_order.F90 b/src/tests/performance/BurgersMPI/periodic_2nd_order.F90
new file mode 100644
index 0000000..99a0611
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/periodic_2nd_order.F90
@@ -0,0 +1,341 @@
+! Copyright (c) 2011, Damian Rouson, Jim Xia, and Xiaofeng Xu.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the names of Damian Rouson, Jim Xia, and Xiaofeng Xu nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL DAMIAN ROUSON, JIM XIA, and XIAOFENG XU BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+module periodic_2nd_order_module
+  !note co-object and field modules no longer necessary so they are not used
+  use kind_parameters ,only : rkind, ikind
+  use ForTrilinos_assertion_utility, only : assert,error_message
+  use object_interface, only : object
+  use input_file, only:grid_resolution !added input file so that multiple modules/main can access grid_resolution,CZC
+  use mpi_module, only :mpi_class !CZC
+  use mpi_share, only:mpi_object !CZC
+  use shared !CZC
+  implicit none
+  private
+  public :: periodic_2nd_order, initial_field
+
+  type, extends(object) :: periodic_2nd_order
+  !   private
+     !Make arrays larger than necessary as they must have an explicit intialization
+     real(rkind), allocatable :: global_f(:) !object MPI variable for storing function, CZC
+
+  contains
+    procedure :: construct
+    procedure :: assign_field
+    procedure :: add           => add_field
+    procedure :: multiply      => multiply_field
+    procedure :: multiply_real  !CZC
+    procedure :: subtract       !CZC
+    procedure :: x             => df_dx
+    procedure :: xx            => d2f_dx2
+    procedure :: runge_kutta_2nd_step => rk2_dt
+    procedure, nopass :: this_image_contains
+    procedure :: has_a_zero_at
+    procedure :: local_state
+    procedure, nopass :: set_time
+    procedure, nopass :: get_time
+    generic   :: assignment(=) => assign_field
+    generic   :: operator(+)   => add
+    generic   :: operator(*)   => multiply
+    generic   :: operator(*)    => multiply_real!added functionality, taken from field class, CZC
+    generic   :: operator(-)    => subtract !added functionality, taken from field class, CZC
+    procedure :: output
+   !generic :: write=>output ! Fortran 2003 derived-type output
+  end type
+
+  real(rkind) ,parameter :: pi=acos(-1._rkind)
+  real(rkind), allocatable :: local_grid(:)
+  real(rkind) :: time=0.
+
+  abstract interface
+    real(rkind) pure function initial_field(x)
+      import :: rkind
+      real(rkind) ,intent(in) :: x
+    end function
+  end interface
+
+contains
+
+  pure function local_state(this) result(local_state_vector)
+     class(periodic_2nd_order), intent(in) :: this
+     real(rkind), allocatable :: local_state_vector(:)
+     integer(ikind) :: i
+     local_state_vector = this%global_f
+  end function
+
+  subroutine output(this,unit,iotype,v_list,iostat,iomsg)
+    class(periodic_2nd_order), intent(in) :: this
+    integer, intent(in) :: unit ! Unit on which output happens (negative for internal file)
+    character(*), intent(in) :: iotype ! Allowable values: ’LISTDIRECTED’,’NAMELIST’, or ’DT’
+    integer, intent(in) :: v_list(:)
+    integer, intent(out) :: iostat
+    character(*), intent(inout) :: iomsg
+    integer(ikind) i
+    ! Requires
+    call assert(this%user_defined(),error_message("periodic_2nd_order%output recieved unitialized object."))
+
+   do i = 1, local_grid_resolution
+      write (unit=unit,iostat=iostat,fmt="(i8,3(f12.4,2x))") &
+      (my_id)*local_grid_resolution + i, local_grid(i),time,this%global_f(i) !modified earlier code to work with MPI, CZC
+    end do
+  end subroutine
+
+  subroutine set_time(time_stamp)
+    real(rkind), intent(in) :: time_stamp
+    time = time_stamp
+  end subroutine
+
+  pure function get_time() result(t)
+    real(rkind) :: t
+    t = time
+  end function
+
+ pure function has_a_zero_at(this, expected_location) result(zero_at_expected_location)
+    class(periodic_2nd_order) ,intent(in) :: this
+    real(rkind) ,intent(in) :: expected_location
+    real(rkind), parameter :: tolerance = 1.0E-06_rkind
+    integer :: nearest_grid_point
+    logical :: zero_at_expected_location
+    ! Requires
+    if (this%user_defined()) then
+      nearest_grid_point = minloc(abs(local_grid-expected_location),dim=1)
+      zero_at_expected_location = merge(.true.,.false., abs(this%global_f(nearest_grid_point)) < tolerance  )
+    end if
+  end function
+
+  pure function this_image_contains(location) result(within_bounds)
+     implicit none
+    real(rkind), intent(in) :: location
+    logical within_bounds
+    within_bounds = merge(.true.,.false., (location>=minval(local_grid) .and. location<=maxval(local_grid)) )
+  end function
+
+  subroutine construct (this, initial,num_grid_pts)
+     implicit none
+    class(periodic_2nd_order), intent(inout) :: this
+    procedure(initial_field) ,pointer, intent(in) :: initial
+    integer(ikind) ,intent(in) :: num_grid_pts
+    integer :: i
+    !local variables for storing nearby nodes
+    DOUBLE PRECISION left,right !CZC
+    DOUBLE PRECISION left_sub,right_sub!CZC
+    ! Requires
+    call assert(mod(num_grid_pts, num_procs)==0,error_message("periodic_2nd_order%construct: invalid number of grid points."))
+    local_grid = grid()
+    allocate(this%global_f(local_grid_resolution))
+    do concurrent (i=1:local_grid_resolution)
+      this%global_f(i) = initial(local_grid(i))
+    end do
+  ! Ensures
+
+    call mpi_object%oned_message(this%global_f(1:local_grid_resolution),local_grid_resolution,left_sub,right_sub)!communicate with neighbors,
+
+    call this%mark_as_defined
+  contains
+    pure function grid()
+     implicit none
+      integer(ikind) :: i
+      real(rkind) ,dimension(:) ,allocatable :: grid
+      allocate(grid(local_grid_resolution))
+      do concurrent (i=1:local_grid_resolution)
+        grid(i)  = 2.*pi*(local_grid_resolution*(my_id)+i-1) &
+                   /real(num_grid_pts,rkind)
+      end do
+    end function
+  end subroutine
+
+  real(rkind) function rk2_dt(this,nu,num_grid_pts)
+     implicit none
+    class(periodic_2nd_order) ,intent(in) :: this
+    real(rkind) ,intent(in) :: nu
+    integer(ikind) ,intent(in) :: num_grid_pts
+    real(rkind)             :: dx, CFL, k_max
+    ! Requires
+    if (this%user_defined()) then
+      dx=2.0*pi/num_grid_pts
+      k_max=num_grid_pts/2.0_rkind
+      CFL=1.0/(1.0-cos(k_max*dx))
+      rk2_dt = CFL*dx**2/nu
+    end if
+  end function
+
+  ! this is the assignment
+  subroutine assign_field(lhs,rhs)
+    implicit none
+    class(periodic_2nd_order) ,intent(inout) :: lhs
+    type(periodic_2nd_order) ,intent(in) :: rhs
+    DOUBLE PRECISION left,right !CZC
+    DOUBLE PRECISION left_sub,right_sub !CZC
+
+    ! Requires
+    call assert(rhs%user_defined(),error_message("periodic_2nd_order%copy received undefind RHS."))
+    ! update global field
+    lhs%global_f = rhs%global_f !CZC
+    ! Ensures
+    call lhs%mark_as_defined
+
+  end subroutine
+
+  function add_field (this, rhs)
+     implicit none
+    class(periodic_2nd_order), intent(in) :: this
+     class(periodic_2nd_order), intent(in) :: rhs
+    type(periodic_2nd_order) :: add_field
+   ! Requires
+    allocate(add_field%global_f(local_grid_resolution))
+    if (rhs%user_defined() .and. this%user_defined()) then
+      add_field%global_f(1:local_grid_resolution) = rhs%global_f(1:local_grid_resolution)+this%global_f(1:local_grid_resolution)
+      ! Ensures
+      call add_field%mark_as_defined
+    end if
+  end function
+
+  function multiply_field (this, rhs)
+     implicit none
+    class(periodic_2nd_order), intent(in) :: this, rhs
+    type(periodic_2nd_order) :: multiply_field
+
+     ! Requires
+    allocate(multiply_field%global_f(local_grid_resolution))
+    if (this%user_defined() .and. rhs%user_defined()) then
+       multiply_field%global_f(1:local_grid_resolution)=this%global_f(1:local_grid_resolution)*rhs%global_f(1:local_grid_resolution)
+      ! Ensures
+      call multiply_field%mark_as_defined
+    end if
+  end function
+
+!New procedure, functionality taken from field, CZC
+  function multiply_real(lhs,rhs) result(product_)
+    class(periodic_2nd_order) ,intent(in) :: lhs
+    real(rkind) ,intent(in)  :: rhs
+    type(periodic_2nd_order) :: product_
+    ! Requires
+    allocate(product_%global_f(local_grid_resolution))
+    if (lhs%user_defined()) then
+      product_%global_f(1:local_grid_resolution) = lhs%global_f(1:local_grid_resolution) * rhs !multiply array with scalar
+      ! Ensures
+      call product_%mark_as_defined
+    end if
+  end function
+   !new procedure, functionality taken from field, CZC
+  pure function subtract(lhs,rhs) result(difference)
+    class(periodic_2nd_order) ,intent(in) :: lhs
+    class(periodic_2nd_order) ,intent(in)  :: rhs
+    type(periodic_2nd_order) :: difference
+    ! Requires
+    allocate(difference%global_f(local_grid_resolution))
+    if (lhs%user_defined() .and. rhs%user_defined()) then
+      difference%global_f(1:local_grid_resolution) = lhs%global_f(1:local_grid_resolution) - rhs%global_f(1:local_grid_resolution) !subtract arrays
+      ! Ensures
+      call difference%mark_as_defined
+    end if
+  end function
+
+  function df_dx(this)
+     implicit none
+    class(periodic_2nd_order), intent(in) :: this
+    type(periodic_2nd_order)  :: df_dx
+    integer(ikind) :: i,nx
+    real(rkind) :: dx, left_image, right_image
+    real(rkind), dimension(:), allocatable, save :: tmp_field_array
+    ! Requires
+    if (this%user_defined()) then
+
+        nx = local_grid_resolution
+        if (.not.allocated(tmp_field_array)) allocate(tmp_field_array(nx))
+        dx=2.*pi/(real(nx,rkind)*num_procs)
+
+        if (num_procs > 1) then
+            call MPI_SENDRECV(this%global_f(1),1,MPI_DOUBLE_PRECISION,left_id,0, &
+            right_image,1,MPI_DOUBLE_PRECISION,right_id,0,MPI_COMM_CART,status,ierr)
+            call MPI_SENDRECV(this%global_f(local_grid_resolution),1,MPI_DOUBLE_PRECISION,right_id,0, &
+            left_image,1,MPI_DOUBLE_PRECISION,left_id,0,MPI_COMM_CART,status,ierr)
+        else
+            left_image = this%global_f(nx)
+            right_image = this%global_f(1)
+        end if
+
+      tmp_field_array(1) = &
+         0.5*(this%global_f(2)-left_image)/dx
+
+      tmp_field_array(nx) = &
+         0.5*(right_image-this%global_f(nx-1))/dx
+
+      do concurrent(i=2:nx-1)
+        tmp_field_array(i)=&
+          0.5*(this%global_f(i+1)-this%global_f(i-1))/dx
+      end do
+
+      df_dx%global_f = tmp_field_array
+      ! Ensures
+      call df_dx%mark_as_defined
+    end if
+  end function
+
+  function d2f_dx2(this)
+    implicit none
+    class(periodic_2nd_order), intent(in) :: this
+    type(periodic_2nd_order)  :: d2f_dx2
+    integer(ikind) :: i,nx
+    real(rkind) :: dx, left_image, right_image
+    real(rkind), dimension(:), allocatable, save :: tmp_field_array
+
+    ! Requires
+    if (this%user_defined()) then
+
+        nx = local_grid_resolution
+        if (.not.allocated(tmp_field_array)) allocate(tmp_field_array(nx))
+        dx=2.*pi/(real(nx,rkind)*num_procs)
+
+        if (num_procs > 1) then
+            call MPI_SENDRECV(this%global_f(1),1,MPI_DOUBLE_PRECISION,left_id,0, &
+            right_image,1,MPI_DOUBLE_PRECISION,right_id,0,MPI_COMM_CART,status,ierr)
+            call MPI_SENDRECV(this%global_f(local_grid_resolution),1,MPI_DOUBLE_PRECISION,right_id,0, &
+            left_image,1,MPI_DOUBLE_PRECISION,left_id,0,MPI_COMM_CART,status,ierr)
+        else
+            left_image = this%global_f(nx)
+            right_image = this%global_f(1)
+        end if
+
+        tmp_field_array(1) = &
+         (this%global_f(2)-2.0*this%global_f(1)+left_image)&
+         /dx**2
+
+      tmp_field_array(nx) =&
+         (right_image-2.0*this%global_f(nx)+this%global_f(nx-1))&
+         /dx**2
+
+      do concurrent (i=2:nx-1)
+        tmp_field_array(i)=&
+          (this%global_f(i+1)-2.0*this%global_f(i)+this%global_f(i-1))&
+          /dx**2
+      end do
+
+      d2f_dx2%global_f = tmp_field_array
+      ! Ensures
+      call d2f_dx2%mark_as_defined
+    end if
+  end function
+end module
diff --git a/src/tests/performance/BurgersMPI/shared.F90 b/src/tests/performance/BurgersMPI/shared.F90
new file mode 100644
index 0000000..d35d7ef
--- /dev/null
+++ b/src/tests/performance/BurgersMPI/shared.F90
@@ -0,0 +1,36 @@
+! MPI 1D Burgers equation solver test: shared
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+module shared
+  !module for sharing mpi functionality/variables with other modules/main
+  use mpi !non-native mpi functionality
+  integer :: tag, status(MPI_STATUS_SIZE)
+  integer :: MPI_COMM_CART
+  integer :: my_id, num_procs, ierr, local_grid_resolution, left_id, right_id
+  integer, parameter :: send_data_tag = 2001, return_data_tag = 2002 !tags for sending information
+  integer, parameter :: root_process = 0
+  integer, parameter :: max_local_resolution = 10000
+end module
diff --git a/src/tests/performance/CMakeLists.txt b/src/tests/performance/CMakeLists.txt
new file mode 100644
index 0000000..b7d67d2
--- /dev/null
+++ b/src/tests/performance/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_subdirectory(psnap)
+add_subdirectory(mpi_dist_transpose)
+add_subdirectory(BurgersMPI)
diff --git a/src/tests/performance/mpi_dist_transpose/CMakeLists.txt b/src/tests/performance/mpi_dist_transpose/CMakeLists.txt
new file mode 100644
index 0000000..34c5bec
--- /dev/null
+++ b/src/tests/performance/mpi_dist_transpose/CMakeLists.txt
@@ -0,0 +1,16 @@
+IF (("${CMAKE_SYSTEM_NAME}" MATCHES "Linux" ) AND (${HIGH_RESOLUTION_TIMER}))
+  add_definitions(-DHAVE_WALLTIME)
+  message("\nUsing assembly-language timers that tick once per clock cycle.")
+  message("WARNING: walltime.o is designed for an x86 CPU with a 3.6 GHz clock.")
+  message("Because modern processors can throttle their clock spees, do not ")
+  message("rely on this timer for an absolute value, but it can be useful for ")
+  message("comparison or relative execution execution times on the same platform.\n")
+  add_executable(mpi_distributed_transpose
+    mpi_distributed_transpose.F90
+    walltime.o
+  )
+else()
+  add_executable(mpi_distributed_transpose
+    mpi_distributed_transpose.F90
+  )
+endif()
diff --git a/src/tests/performance/mpi_dist_transpose/mpi_distributed_transpose.F90 b/src/tests/performance/mpi_dist_transpose/mpi_distributed_transpose.F90
new file mode 100644
index 0000000..1a1cd0a
--- /dev/null
+++ b/src/tests/performance/mpi_dist_transpose/mpi_distributed_transpose.F90
@@ -0,0 +1,381 @@
+! MPI Distributed Transpose Test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+! Robodoc header:
+!****m* dist_transpose/mpi_run_size
+! NAME
+!   mpi_run_size
+! SYNOPSIS
+!   Encapsulate the problem state, wall-clock timer interface, integer broadcasts, and a data copy
+!   for a distributed tranpsose kernel extracted from a program for the Fourier-spectral simulation
+!   of statistically homogeneous.
+!******
+!==================  test transposes with integer x,y,z values  ===============================
+module mpi_run_size
+    use iso_fortran_env
+#ifndef HAVE_WALLTIME
+    use MPI, only : WALLTIME=>MPI_WTIME
+#endif
+    implicit none
+        integer(int64) :: nx, ny, nz
+        integer(int64) :: my, mx, first_y, last_y, first_x, last_x
+        integer(int64) :: my_node, num_nodes
+        real(real64) :: tran_time
+
+#ifdef HAVE_WALLTIME
+interface
+   function WALLTIME() bind(C, name = "WALLTIME")
+   use iso_fortran_env
+       real(real64) :: WALLTIME
+   end function WALLTIME
+end interface
+#endif
+
+contains
+
+subroutine mpi_copy3( A,B, n1, sA1, sB1, n2, sA2, sB2, n3, sA3, sB3 )
+  use iso_fortran_env
+  implicit none
+  complex, intent(in)  :: A(0:*)
+  complex, intent(out) :: B(0:*)
+  integer(int64), intent(in) :: n1, sA1, sB1
+  integer(int64), intent(in) :: n2, sA2, sB2
+  integer(int64), intent(in) :: n3, sA3, sB3
+  integer(int64) i,j,k
+
+  do k=0,n3-1
+     do j=0,n2-1
+        do i=0,n1-1
+           B(i*sB1+j*sB2+k*sB3) = A(i*sA1+j*sA2+k*sA3)
+        end do
+     end do
+  end do
+end subroutine mpi_copy3
+
+end module mpi_run_size
+
+!****e* dist_transose/mpi_distributed_transpose
+! NAME
+!   mpi_distributed_transpose
+! SYNOPSIS
+!   This program is the MPI analogue of coarray_distributed_transpose. It tests the transpose routines used
+!   in Fourier-spectral simulations of homogeneous turbulence.
+!******
+
+program mpi_distributed_transpose
+  !(***********************************************************************************************************
+  !                   m a i n   p r o g r a m
+  !***********************************************************************************************************)
+      use mpi_run_size
+      implicit none
+      include 'mpif.h'
+
+      complex, allocatable ::  u(:,:,:,:)    ! u(nz,4,first_x:last_x,ny)    !(*-- ny = my * num_nodes --*)
+      complex, allocatable ::  ur(:,:,:,:)   !ur(nz,4,first_y:last_y,nx/2)  !(*-- nx/2 = mx * num_nodes --*)
+      complex, allocatable :: bufr(:)
+
+      integer(int64) :: x, y, z, msg_size, iter
+      integer(int64) :: ierror
+
+  call MPI_INIT(ierror)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, my_node, ierror)
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, num_nodes, ierror)
+
+      if( my_node == 0 ) then
+       !write(6,fmt="(A)") "nx,ny,nz : "
+       !read(5,*) nx, ny, nz
+       nx=32; ny=32; nz=32
+      end if
+
+        call MPI_BARRIER(MPI_COMM_WORLD, ierror)  !-- other nodes wait for broadcast!
+        call MPI_BCAST( nx, 1, MPI_INT, 0,MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( ny, 1, MPI_INT, 0,MPI_COMM_WORLD, ierror )
+        call MPI_BCAST( nz, 1, MPI_INT, 0,MPI_COMM_WORLD, ierror )
+
+
+ 	  if ( mod(ny,num_nodes) == 0)  then;   my = ny / num_nodes
+                                    else;   write(6,*) "node ", my_node, " ny not multiple of num_nodes";     stop
+      end if
+
+      if ( mod(nx/2,num_nodes) == 0)  then;   mx = nx/2 / num_nodes
+                                    else;   write(6,*) "node ", my_node, "nx/2 not multiple of num_nodes";    stop
+      end if
+
+      first_y = my_node*my + 1;   last_y  = my_node*my + my
+      first_x = my_node*mx + 1;   last_x  = my_node*mx + mx
+
+      msg_size = nz*4*mx*my     !-- message size (complex data items
+
+      allocate (  u(nz , 4 , first_x:last_x , ny)   )   !(*-- y-z planes --*)
+      allocate ( ur(nz , 4 , first_y:last_y , nx/2) )   !(*-- x-z planes --*)
+      allocate ( bufr(msg_size) )
+
+
+!---------  initialize data u (mx y-z planes per image) ----------
+
+        do x = first_x, last_x
+            do y = 1, ny
+                do z = 1, nz
+                    u(z,1,x,y) = x
+                    u(z,2,x,y) = y
+                    u(z,3,x,y) = z
+                end do
+            end do
+        end do
+
+    tran_time = 0
+    do iter = 1, 2  !--- 2 transform pairs per second-order time step
+
+!---------  transpose data u -> ur (mx y-z planes to my x-z planes per image)  --------
+
+      ur = 0
+      call transpose_X_Y
+
+!--------- test data ur (my x-z planes per image) ----------
+
+         do x = 1, nx/2
+            do y = first_y, last_y
+                do z = 1, nz
+                    if ( real(ur(z,1,y,x)) /= x .or. real(ur(z,2,y,x)) /= y .or. real(ur(z,3,y,x)) /= z )then
+                         write(6,fmt="(A,i3,3(6X,A,f7.3,i4))") "transpose_X_Y failed:  image ", my_node &
+                            , " X ",real(ur(z,1,y,x)),x, "  Y ",real(ur(z,2,y,x)),y, "  Z ", real(ur(z,3,y,x)),z
+                        stop
+                    end if
+                end do
+            end do
+        end do
+
+!---------  transpose data ur -> u (my x-z planes to mx y-z planes per image)  --------
+
+      u = 0
+      call transpose_Y_X
+
+!--------- test data u (mx y-z planes per image) ----------
+
+         do x = first_x, last_x
+            do y = 1, ny
+                do z = 1, nz
+                    if ( real(u(z,1,x,y)) /= x .or. real(u(z,2,x,y)) /= y .or. real(u(z,3,x,y)) /= z )then
+                         write(6,fmt="(A,i3,3(6X,A,f7.3,i4))") "transpose_Y_X failed:  image ", my_node &
+                            , " X ",real(u(z,1,y,x)),x, "  Y ",real(u(z,2,y,x)),y, "  Z ", real(u(z,3,y,x)),z
+                        stop
+                    end if
+                end do
+            end do
+        end do
+    end do
+
+call MPI_BARRIER(MPI_COMM_WORLD, ierror)
+
+        if( my_node == 0 )  write(6,fmt="(A,f8.3)")  "test passed:  tran_time ", tran_time
+
+    deallocate ( bufr, ur, u)
+
+!=========================   end of main executable  =============================
+
+contains
+
+!-------------   out-of-place transpose data_s --> data_r  ----------------------------
+
+ subroutine transpose_X_Y
+
+    use mpi_run_size
+    implicit none
+
+    integer(int64) :: to, from, send_tag, recv_tag
+    integer :: stage, idr(0:num_nodes-1), ids(0:num_nodes-1)
+    integer(int64) :: send_status(MPI_STATUS_SIZE), recv_status(MPI_STATUS_SIZE)
+    character*(MPI_MAX_ERROR_STRING) errs
+
+    call MPI_BARRIER(MPI_COMM_WORLD, ierror)   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+!--------------   transpose my image's block (no communication needed)  ------------------
+
+    call mpi_copy3 (    u(1,1,first_x,1+my_node*my) &                   !-- intra-node transpose
+                ,  ur(1,1,first_y,1+my_node*mx) &                   !-- no inter-node transpose needed
+                ,   nz*3, 1_8, 1_8        &                                 !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+#define RECEIVE
+#ifdef RECEIVE
+
+!--------------   issue all block sends ... tags = 256*dst_image+src_image  ------------------
+
+    do stage = 1, num_nodes-1
+        to = mod( my_node+stage, num_nodes )
+        send_tag = 256*to + my_node
+        call MPI_ISSEND ( u(1,1,first_x,1+to*my) &
+                        ,   msg_size*2,  MPI_REAL,  to, send_tag,  MPI_COMM_WORLD,  ids(stage),  ierror)
+    end do
+
+!--------------   receive and transpose other image's block  ------------------
+
+    do stage = 1, num_nodes-1   !-- process receives in order
+        from = mod( my_node+stage, num_nodes )
+        recv_tag = 256*my_node + from
+        call MPI_RECV  ( bufr &
+                        ,   msg_size*2,  MPI_REAL,  from, recv_tag,  MPI_COMM_WORLD,  recv_status,  ierror)
+
+        call mpi_copy3 ( bufr, ur(1,1,first_y,1+from*mx)  &                !-- intra-node transpose from buffer
+                        ,   nz*3, 1_8, 1_8        &                             !-- note: only 3 of 4 words needed
+                        ,   mx, nz*4, nz*4*my &
+                        ,   my, nz*4*mx, nz*4 )
+    end do
+
+#else
+
+!--------------   issue all block receives ... tags = 256*dst_image+src_image  ------------------
+
+    do stage = 1, num_nodes-1
+        from = mod( my_node+stage, num_nodes )
+        recv_tag = 256*my_node + from
+        call MPI_IRECV  ( ur(1,1,first_y,1+from*mx) &
+                        ,   msg_size*2,  MPI_REAL,  from, recv_tag,  MPI_COMM_WORLD,  idr(stage),  ierror)
+    end do
+
+!--------------   issue all block sends ... tags = 256*dst_image+src_image  ------------------
+
+
+    do stage = 1, num_nodes-1   !-- process sends in order
+        to = mod( my_node+stage, num_nodes )
+        send_tag = 256*to + my_node
+        call mpi_copy3 ( u(1,1,first_x,1+to*my), bufr &                !-- intra-node transpose from buffer
+                ,   nz*3, 1_8, 1_8        &                             !-- note: only 3 of 4 words needed
+                ,   mx, nz*4, nz*4*my &
+                ,   my, nz*4*mx, nz*4 )
+
+        call MPI_SEND ( bufr &
+                        ,   msg_size*2,  MPI_REAL,  to, send_tag,  MPI_COMM_WORLD,  ierror)
+    end do
+
+!--------------   wait on receives   ------------------
+
+    do stage = 1, num_nodes-1
+        call MPI_WAIT( idr(stage),  recv_status,  ierror )
+    end do
+
+#endif
+
+call MPI_BARRIER(MPI_COMM_WORLD, ierror)     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+
+! deallocate(ids,idr)
+
+ end  subroutine transpose_X_Y
+
+!-------------   out-of-place transpose data_r --> data_s  ----------------------------
+
+ subroutine transpose_Y_X
+
+    use mpi_run_size
+    implicit none
+
+    integer(int64) :: to, from, send_tag, recv_tag
+    integer :: stage, idr(0:num_nodes-1), ids(0:num_nodes-1)
+    character*(MPI_MAX_ERROR_STRING) errs
+    integer(int64) :: send_status(MPI_STATUS_SIZE), recv_status(MPI_STATUS_SIZE)
+
+    call MPI_BARRIER(MPI_COMM_WORLD, ierror)   !--  wait for other nodes to finish compute
+    tran_time = tran_time - WALLTIME()
+
+!--------------   transpose my image's block (no communication needed)  ------------------
+
+    call mpi_copy3 (   ur(1,1,first_y,1+my_node*mx) &                   !-- intra-node transpose
+                ,   u(1,1,first_x,1+my_node*my) &                   !-- no inter-node transpose needed
+                ,   nz*4, 1_8, 1_8        &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+#define RECEIVE
+#ifdef RECEIVE
+
+!--------------   issue all block sends ... tags = 256*dst_image+src_image  ------------------
+
+    do stage = 1, num_nodes-1
+        to = mod( my_node+stage, num_nodes )
+        send_tag = 256*to + my_node
+        call MPI_ISSEND ( ur(1,1,first_y,1+to*mx)    &
+                        ,   msg_size*2,  MPI_REAL,  to, send_tag,  MPI_COMM_WORLD,  ids(stage),  ierror)
+
+    end do
+
+!--------------   transpose other image's block (get block then transpose it)  ------------------
+
+    do stage = 1, num_nodes-1   !-- process receives in order
+        from = mod( my_node+stage, num_nodes )
+        recv_tag = 256*my_node + from
+        call MPI_RECV  ( bufr  &
+                        ,   msg_size*2,  MPI_REAL,  from, recv_tag,  MPI_COMM_WORLD,  recv_status,  ierror)
+
+        call mpi_copy3 ( bufr, u(1,1,first_x,1+from*my)  &                 !-- intra-node transpose from buffer
+                    ,   nz*4, 1_8, 1_8        &
+                    ,   my, nz*4, nz*4*mx &
+                    ,   mx, nz*4*my, nz*4 )
+    end do
+
+#else
+
+!--------------   issue all block receives ... tags = 256*dst_image+src_image  ------------------
+
+    do stage = 1, num_nodes-1
+        from = mod( my_node+stage, num_nodes )
+        recv_tag = 256*my_node + from
+        call MPI_IRECV  ( u(1,1,first_x,1+from*my) &
+                        ,   msg_size*2,  MPI_REAL,  from, recv_tag,  MPI_COMM_WORLD,  idr(stage),  ierror)
+    end do
+
+!--------------   issue all block sends ... tags = 256*dst_image+src_image  ------------------
+
+
+    do stage = 1, num_nodes-1   !-- process sends in order
+        to = mod( my_node+stage, num_nodes )
+        send_tag = 256*to + my_node
+        call mpi_copy3 ( ur(1,1,first_y,1+to*mx), bufr &                !-- intra-node transpose from buffer
+                ,   nz*4, 1_8, 1_8        &                                 !-- note: all 4 words needed
+                ,   my, nz*4, nz*4*mx &
+                ,   mx, nz*4*my, nz*4 )
+
+        call MPI_SEND ( bufr &
+                        ,   msg_size*2,  MPI_REAL,  to, send_tag,  MPI_COMM_WORLD,  ierror)
+    end do
+
+!--------------   wait on receives   ------------------
+
+    do stage = 1, num_nodes-1
+        call MPI_WAIT( idr(stage),  recv_status,  ierror )
+    end do
+
+#endif
+
+    call MPI_BARRIER(MPI_COMM_WORLD, ierror)     !--  wait for other nodes to finish transpose
+    tran_time = tran_time + WALLTIME()
+ !   deallocate(ids,idr)
+ end  subroutine transpose_Y_X
+
+end program mpi_distributed_transpose
diff --git a/src/tests/performance/mpi_dist_transpose/walltime.o b/src/tests/performance/mpi_dist_transpose/walltime.o
new file mode 100644
index 0000000..33016a9
Binary files /dev/null and b/src/tests/performance/mpi_dist_transpose/walltime.o differ
diff --git a/src/tests/performance/mpi_dist_transpose/walltime.x64 b/src/tests/performance/mpi_dist_transpose/walltime.x64
new file mode 100644
index 0000000..dd89ad2
--- /dev/null
+++ b/src/tests/performance/mpi_dist_transpose/walltime.x64
@@ -0,0 +1,20 @@
+	MHz := 3600.e6         !--- clock frequency of i7-3820
+
+	program
+
+	time :	double = 1./MHz     !-- time/tick
+
+	entry "WALLTIME"
+	entry "_WALLTIME"
+
+	RAX,RDX = TSC       !-- read time-stamp counter ticks
+	RDX &= $000fffff    !-- convert tricks to 64-bit float
+	RDX |= $43300000
+	RDX <<= 32
+	RDX |= RAX
+
+	[RSP-8] = RDX       !-- multiply ticks by time/tick
+    LO XM0 = [RSP-8]
+    XM0.v1 *= time
+
+	RET
diff --git a/src/tests/performance/psnap/CMakeLists.txt b/src/tests/performance/psnap/CMakeLists.txt
new file mode 100644
index 0000000..b0a5d2f
--- /dev/null
+++ b/src/tests/performance/psnap/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(caf_psnap cafpsnap.f90 timemeasure.c)
+target_link_libraries(caf_psnap OpenCoarrays)
diff --git a/src/tests/performance/psnap/cafpsnap.f90 b/src/tests/performance/psnap/cafpsnap.f90
new file mode 100644
index 0000000..70dde56
--- /dev/null
+++ b/src/tests/performance/psnap/cafpsnap.f90
@@ -0,0 +1,707 @@
+! P-SNAP test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
+!  Fortran translation of
+
+!/*
+! * P-SNAP v1.2 -- PAL System Noise Activity Program -- LA-CC-06-025
+! *        <http://www.c3.lanl.gov/pal/software/psnap/>
+! *
+! * Copyright (C) 2006, The Regents of the University of California
+! *
+! *                PAL -- Performance and Architecture Laboratory
+! *                  <http://www.c3.lanl.gov/pal/>
+! *                Los Alamos National Laboratory
+! *                  <http://www.lanl.gov/>
+! */
+
+!  by Dan Nagle
+
+! * This program is free software; you can redistribute it and/or modify
+! * it under the terms of the GNU General Public License as published by
+! * the Free Software Foundation; either version 2 of the License, or
+! * (at your option) any later version.
+! *
+! * This program is distributed in the hope that it will be useful,
+! * but WITHOUT ANY WARRANTY; without even the implied warranty of
+! * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! * GNU General Public License for more details.
+! *
+! * You should have received a copy of the GNU General Public License
+! * along with this program; if not, write to the Free Software
+! * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+! * 02110-1301 USA.
+
+program psnap
+
+!  #define VERSION_STR "v1.2"
+
+use, intrinsic :: iso_fortran_env, only: error_unit, output_unit, int64, real64, int32
+
+use, intrinsic :: iso_c_binding
+
+character( len= *), parameter :: psnap_rcs_id = &
+   '$Id$'
+
+character( len= *), parameter :: string_fmt = '( a)'
+
+type, bind( c) :: counters
+
+   integer( c_long) :: val
+   integer( c_int) :: index
+
+end type counters
+
+!  int rank, np; // globals
+
+integer :: rank
+integer :: np
+integer :: np_half
+
+!integer( int64), save :: n = 100000
+integer( int64), codimension[ *], save :: n = 1000
+integer( int64), codimension[ *], save :: w = 1000
+integer( int64), dimension( :), allocatable :: r
+integer( int64) :: i
+integer( int64), codimension[ *], save :: iteration_count = 0
+integer( int64), codimension[ *] :: localmax, globalmax
+integer( int64), codimension[ *] :: localsum
+integer( int64), dimension( :), allocatable :: sum_all
+integer( int64), dimension( :), allocatable, codimension[ :] :: localhist
+integer( int64), codimension[ *], save :: granularity = 1000
+integer( int64), codimension[ *], save :: barrier = 0
+
+!character( kind= c_char, len= 1024), codimension[ *] :: hostname
+
+interface
+  subroutine start_timer() bind(C, name="start_timer")
+    use iso_c_binding
+  end subroutine
+  subroutine stop_timer() bind(C, name="stop_timer")
+    use iso_c_binding
+  end subroutine
+  function elapsed_time() bind(c,name="elapsed_time") result(res)
+    use iso_c_binding
+    !use, intrinsic :: iso_fortran_env, only: int64
+    integer(c_int) :: res
+  end function
+end interface
+
+type( counters), codimension[ *] :: count_loc, globalminloc, globalmaxloc
+
+   integer( int64) :: j
+   integer :: astat
+
+   character( len= 32) :: cl_arg
+
+! ----------------------------------------------------------------------
+
+!  psnap text
+
+continue
+
+   rank = this_image()
+   np = num_images()
+   np_half = np / 2
+
+   j = 0
+
+   if( rank == 1 )then
+
+      cl_args: do
+
+         j = j + 1
+
+         if( j >= command_argument_count() ) exit cl_args
+
+         call get_command_argument( number= int( j, int32), value= cl_arg)
+
+         which_arg: select case( cl_arg( 2: 2))
+
+         case( 'b')
+
+            j = j + 1
+            call get_command_argument( number= int( j, int32), value= cl_arg)
+            write( unit= barrier, fmt= *) cl_arg
+            do i = 2, np
+               barrier[ i] = barrier
+            end do
+
+         case( 'n')
+
+            j = j + 1
+            call get_command_argument( number= int( j, int32), value= cl_arg)
+            write( unit= n, fmt= *) cl_arg
+            w = n / 10
+            do i = 2, np
+               n[ i] = n
+               w[ i] = w
+            end do
+
+         case( 'w')
+
+            j = j + 1
+            call get_command_argument( number= int( j, int32), value= cl_arg)
+            write( unit= w, fmt= *) cl_arg
+            do i = 2, np
+               w[ i] = w
+            end do
+
+         case( 'c')
+
+            j = j + 1
+            call get_command_argument( number= int( j, int32), value= cl_arg)
+            write( unit= iteration_count, fmt= *) cl_arg
+            do i = 2, np
+               iteration_count[ i] = iteration_count
+            end do
+
+         case( 'g')
+
+            j = j + 1
+            call get_command_argument( number= int( j, int32), value= cl_arg)
+            write( unit= granularity, fmt= *) cl_arg
+            do i = 2, np
+               granularity[ i] = granularity
+            end do
+
+         case( 'h')
+
+            call usage()
+            stop 'normal exit in psnap'
+
+         case default
+
+            call usage()
+            stop 'normal exit in psnap'
+
+         end select which_arg
+
+      end do cl_args
+
+   end if
+
+!  distribute sizes before allocating
+
+   sync all
+
+   !call get_environment_variable( name= 'HOSTNAME', value= hostname)
+
+   allocate( r( 1: n + w), stat= astat)
+
+   alloc_r_error: if( astat > 0 )then
+
+      stop 'error allocating r'
+
+   end if alloc_r_error
+
+   allocate( sum_all( 1: np), stat= astat)
+
+   alloc_sum_error: if( astat > 0 )then
+
+      stop 'error allocating sum_all'
+
+   end if alloc_sum_error
+
+   only_rank_0: if( rank == 1 )then
+
+      call print_banner()
+
+   end if only_rank_0
+
+! ----------------------------------------------------------------------
+
+!  warmup loop here; calibration follows
+
+   if( w > 0 ) call warmup_loop( w)
+
+   if( iteration_count == 0 )then
+
+      iteration_count = calibrate_loop( granularity)
+
+	!write(*,*) 'Iteration after calibrate', iteration_count,'proc',rank
+
+      count_loc% val = iteration_count
+      count_loc% index = rank
+
+      globalminloc = count_loc
+      globalmaxloc = count_loc
+
+!  compute global counts before communicating them
+
+      sync all
+
+      do i = 2, np
+         if( globalminloc[ i]% val < globalminloc% val )then
+            globalminloc = globalminloc[ i]
+         end if
+         if( globalmaxloc[ i]% val > globalmaxloc% val )then
+            globalmaxloc = globalmaxloc[ i]
+         end if
+      end do
+
+      if( rank == 1 )then
+         write( unit= output_unit, fmt= '( a, i0/ a, i0, a, i0/ a, i0, a, i0)') "my_count= ", iteration_count, &
+                                    "global_min= ", globalminloc% val, " min_loc= ", globalminloc% index, &
+                                    "global_max= ", globalmaxloc% val, " max_loc= ", globalmaxloc% index
+         write( unit= output_unit, fmt= string_fmt) "Using Global max for calibration"
+      end if
+
+      iteration_count = globalmaxloc% val
+
+   end if
+
+   r = 0
+
+! ----------------------------------------------------------------------
+
+!  measurement loop
+
+   sync all
+
+   do i = 1, n + w
+      r( i) = loop( iteration_count)
+      if( barrier /= 0 )then
+         if(  mod( i, barrier) == 0 ) sync all
+      end if
+   end do
+
+   sync all
+
+! ----------------------------------------------------------------------
+
+!   build histograms
+
+   localsum = sum( r( w+1: ) )
+
+   if( rank == 1 )then
+
+      sum_all( 1) = localsum
+
+      do i = 2, np
+         sync images( i)
+         sum_all( i) = localsum[ i]
+      end do
+
+   else
+      sync images( 1)
+   end if
+
+   localmax = maxval( r( w+1: ))
+
+   if( rank == 1 )then
+
+      globalmax = localmax
+
+      do i = 2, np
+         sync images( i)
+         globalmax = max( globalmax, localmax[ i])
+      end do
+
+   else
+      sync images( 1)
+   end if
+
+   if( rank == 1 )then
+
+      do i = 2, np
+         globalmax[ i] = globalmax
+      end do
+
+   end if
+
+   sync all
+
+   allocate( localhist( 0: globalmax)[ *], stat= astat )
+
+   alloc_localhist_error: if( astat > 0 )then
+
+      stop 'error allocating localhist'
+
+   end if alloc_localhist_error
+
+   localhist = 0
+
+   make_hist: do i = 1+w, n+w
+
+      localhist( r( i)) = localhist( r( i)) + 1
+
+   end do make_hist
+
+   final_print: if( rank == 1 )then
+
+!  print rank 0's histogram
+
+      if( n > 0 )then
+         write( unit= output_unit, fmt= '(a, i9, 9x, i9, 3x)') "#", 1, sum_all( 1)!, trim( hostname)
+         do i = 0, globalmax
+            if( localhist( i) > 0 )then
+               write( unit= output_unit, fmt= '(1x, i9, i9, i9, 3x)') rank, i, localhist( i)!, trim( hostname)
+            end if
+         end do
+
+      end if
+
+!  print rank i's histogram
+
+      do i = 2, np
+
+         sync images( i)
+         !localhist(:) = localhist(:)[ i]
+         do j=lbound(localhist,dim=1),ubound(localhist,dim=1)
+           localhist(j) = localhist(j)[ i]
+         end do
+
+         !hostname = hostname[ i]
+         if( n > 0 )then
+            write( unit= output_unit, fmt= '(a, i9, 9x, i9, 3x)') "#", i, sum_all( i)!, trim( hostname)
+            do j = 0, globalmax
+               if( localhist( j) > 0 )then
+                  write( unit= output_unit, fmt= '(1x, i9, i9, i9, 3x)') i, j, localhist( j)!, trim( hostname)
+               end if
+            end do
+
+         end if
+      end do
+
+   else final_print
+
+      sync images( 1)
+
+   end if final_print
+
+stop 'normal exit in psnap'
+
+contains
+
+! ---------------------------------------------------------------------
+
+function get_usecs() result( usecs)
+!integer( int64) :: usecs
+
+!  usec per sec
+
+!integer( int64), parameter :: c = 1000000
+
+!   integer( int64) :: t
+
+!   integer( int64) :: r
+
+integer(kind=8) :: t,r
+real(real64) :: usecs
+integer(kind=8),parameter :: c = 1000000
+
+continue
+
+   !call system_clock( count= t, count_rate= r)
+   call cpu_time(usecs)
+   usecs = usecs*1.d6
+   !if( r /= c ) usecs = int( real( t, 8) / real( r, 8) * real( c, 8))
+   !if( r /= c ) usecs = int( t/r * c)
+
+return
+
+end function get_usecs
+
+! ---------------------------------------------------------------------
+
+function loop( iterations) result( dt)
+integer(int64) :: dt
+integer( int64), intent( in) :: iterations
+
+   integer( int64) :: i
+
+   integer(int64) :: usecs_init, usecs_final
+
+   integer :: next_rank, prev_rank
+
+   integer, codimension[ *], save :: coarray
+
+continue
+
+!   usecs_init = get_usecs()
+
+   call start_timer()
+
+   next_rank = to_upper_half( rank)
+   prev_rank = from_lower_half( rank)
+
+!   write(*,*) 'Proc',rank,'Next rank',next_rank,'Prev rank',prev_rank
+
+   counter: do i = 1, iterations
+
+      even_odd: if( sending_half( rank) )then
+
+!  send rank to next then fetch rank
+
+         coarray[ next_rank] = coarray
+
+         sync images( next_rank)
+
+      else even_odd
+
+!  stay calm
+
+         coarray[ prev_rank] = coarray
+
+         sync images( prev_rank)
+
+      end if even_odd
+
+   end do counter
+
+!   usecs_final = get_usecs()
+   call stop_timer()
+
+!   write(*,*) 'usec_init',usec_init,'usec_final',usec_final
+
+   dt = elapsed_time()
+
+   !write(*,*) 'usec_init',usec_init,'usec_final',usec_final,'dt',dt
+
+return
+
+end function loop
+
+! ---------------------------------------------------------------------
+
+subroutine warmup_loop( wa)
+integer( int64), intent( in) :: wa
+
+!integer( int64), parameter :: counter = 1000000
+integer( int64), parameter :: counter = 10000
+
+   integer( int64) :: min_time_usecs
+   integer( int64) :: loop_time
+
+   integer :: i
+
+continue
+
+   min_time_usecs = huge( 0_int64)
+
+   reloop: do i = 1, wa
+
+      loop_time = loop( counter)
+
+      min_time_usecs = min( loop_time, min_time_usecs)
+
+   end do reloop
+
+return
+
+end subroutine warmup_loop
+
+! ---------------------------------------------------------------------
+
+function calibrate_loop( usecs) result( cl)
+integer( int64) :: cl
+integer( int64), intent( in) :: usecs
+
+integer( int64), parameter :: calibrate_useconds = 100000000
+!real( real64), parameter :: preset_tolerance = 0.001_real64
+real( real64), parameter :: preset_tolerance = 1.0_real64
+
+integer( int64), parameter :: initial_ntrial = 1000
+
+!integer( int64), parameter :: initial_counter = 1000000
+integer( int64), parameter :: initial_counter = 100000
+
+   integer( int64) :: counter
+   integer( int64) :: min_time_usecs
+   integer( int64) :: tolerance
+   integer( int64) :: difference
+   integer( int64) :: total_time
+
+   integer( int64) :: ntrial
+   integer( int64) :: i
+
+   integer( int64) :: loop_time
+
+continue
+
+   counter = initial_counter
+
+   !write(*,*) 'Counter after initial counter',counter
+
+!  if usecs / granularity is less than 1/preset_tolerance then use zero
+
+   tolerance = int( real( usecs, real64) * preset_tolerance, int64)
+
+   total_time = 0
+
+   trials: do
+
+      ntrial = initial_ntrial
+      min_time_usecs = huge( 0_int64)
+
+      get_min: do i = 1, ntrial
+
+         loop_time = loop( counter)
+	 !write(*,*) 'loop_time',loop_time
+         min_time_usecs = min( min_time_usecs, loop_time)
+
+      end do get_min
+
+!  keep an estimate of total calibration time
+
+      total_time = total_time + min_time_usecs * ntrial
+
+      counter = int( real( counter, real64) * real( usecs, real64) / real( min_time_usecs, real64), int64)
+
+      !write(*,*) 'Counter after assignment',counter
+
+      difference = abs( min_time_usecs - usecs)
+
+      if( difference <= tolerance .or. total_time >= calibrate_useconds ) exit trials
+
+   end do trials
+
+   cl = counter
+
+   write( unit= output_unit, fmt= '( a, i2, a, i10, a, i10, a, i10, a, i10)' ) "#rank= ", rank, &
+                                                                           " count= ", counter, " time= ", min_time_usecs, &
+                                                                           " difference= ", difference, " tolerance= ", tolerance
+
+   time_out: if( total_time > calibrate_useconds )then
+
+      write( unit= output_unit, fmt= '( a, i2, a, f10.4, a, f10.4, a, f10.4, a, i0)' ) "PSNAP: WARNING rank ", rank, &
+                                                                  " didn't converge in 10 seconds tolerance = ",  &
+                                                                  real( difference) / real( usecs), &
+                                                                  " should be ", preset_tolerance, " approx ", &
+                                                                  preset_tolerance * 100.0, " percent, granularity= ", usecs
+
+   end if time_out
+
+return
+
+end function calibrate_loop
+
+! ---------------------------------------------------------------------
+
+subroutine print_banner()
+
+continue
+
+   write( unit= output_unit, fmt= string_fmt) '########'
+   write( unit= output_unit, fmt= string_fmt) '##P-SNAP: PAL System Noise Activity Program'
+   write( unit= output_unit, fmt= string_fmt) '##' // psnap_rcs_id
+   write( unit= output_unit, fmt= string_fmt) '##This is a Fortran translation of P-SNAP v 1.2 from'
+   write( unit= output_unit, fmt= string_fmt) '##http://www.c3.lanl.gov/pal/software/psnap/'
+   write( unit= output_unit, fmt= string_fmt) '##This program is the coarray ping-pong version'
+   write( unit= output_unit, fmt= string_fmt) '########'
+
+return
+
+end subroutine print_banner
+
+! ---------------------------------------------------------------------
+
+subroutine usage()
+
+character( len= *), dimension( 17), parameter :: msg = &
+ [ "Usage: psnap [OPTIONS]                                             ", &
+   "                                                                   ", &
+   "  -n <reps>   number of repetitions                                ", &
+   "                default: 100000                                    ", &
+   "  -w <reps>   number of warm-up repetitions                        ", &
+   "                default: 10%% of the number of reps                ", &
+   "  -c <count>  calibration count                                    ", &
+   "                default: perform a calibration to match granularity", &
+   "  -g <usecs>  granularity of the test in microseconds              ", &
+   "                default: 1000                                      ", &
+   "  -b <N>      perform a barrier between every N loops              ", &
+   "                default: no                                        ", &
+   "  -h          this message                                         ", &
+   "                                                                   ", &
+   "  Example: psnap -n 1000000 -w 10 > psnap.out                      ", &
+   "    runs a test with 1000000 repetitions and 10 warm-up reps.      ", &
+   "                                                                   " ]
+
+   integer :: i
+
+continue
+
+   write( unit= error_unit, fmt= string_fmt) ( trim( msg( i)), i = 1, size( msg, 1))
+
+stop 'normal exit in usage'
+
+end subroutine usage
+
+! ---------------------------------------------------------------------
+
+function sending_half( i) result( l)
+
+integer, intent( in) :: i
+logical :: l
+
+continue
+
+!  this must process np == even values only
+
+   l = i <= np_half
+
+return
+
+end function sending_half
+
+! ---------------------------------------------------------------------
+
+function to_upper_half( i) result( l)
+
+integer, intent( in) :: i
+integer :: l
+
+continue
+
+!  this must process lower half ranks only
+
+   l = i + np_half
+
+return
+
+end function to_upper_half
+
+! ---------------------------------------------------------------------
+
+function from_lower_half( i) result( l)
+
+integer, intent( in) :: i
+integer :: l
+
+continue
+
+!  this must process upper half ranks only
+
+   l = i - np_half
+
+return
+
+end function from_lower_half
+
+! ---------------------------------------------------------------------
+
+end program psnap
diff --git a/src/tests/performance/psnap/timemeasure.c b/src/tests/performance/psnap/timemeasure.c
new file mode 100644
index 0000000..8a89dd3
--- /dev/null
+++ b/src/tests/performance/psnap/timemeasure.c
@@ -0,0 +1,56 @@
+/* PSNAP Test: timemeausure.c
+
+  Copyright (c) 2012-2016, Sourcery, Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+      * Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+      * Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+      * Neither the name of the Sourcery, Inc., nor the
+        names of its contributors may be used to endorse or promote products
+        derived from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+
+static struct timezone tz;
+static struct timeval start_time, finish_time;
+
+/* Start measuring a time delay */
+void start_timer(void)
+{
+    gettimeofday( &start_time, &tz);
+}
+
+/* Retunrn elapsed time in microseconds */
+int elapsed_time(void)
+{
+    gettimeofday( &finish_time, &tz);
+    return(1000000.0*(finish_time.tv_sec - start_time.tv_sec) +
+           (finish_time.tv_usec - start_time.tv_usec) );
+}
+
+/* Return the stopping time in microseconds */
+double stop_timer(void)
+{
+    gettimeofday( &finish_time, &tz);
+    return(1000000.0*finish_time.tv_sec + finish_time.tv_usec);
+}
diff --git a/src/tests/unit/CMakeLists.txt b/src/tests/unit/CMakeLists.txt
new file mode 100644
index 0000000..017d983
--- /dev/null
+++ b/src/tests/unit/CMakeLists.txt
@@ -0,0 +1,9 @@
+if (${opencoarrays_aware_compiler})
+  add_subdirectory(simple)
+  add_subdirectory(send-get)
+  add_subdirectory(init_register)
+  add_subdirectory(collectives)
+  add_subdirectory(sync)
+else()
+  add_subdirectory(extensions)
+endif()
diff --git a/src/tests/unit/collectives/CMakeLists.txt b/src/tests/unit/collectives/CMakeLists.txt
new file mode 100644
index 0000000..c58127c
--- /dev/null
+++ b/src/tests/unit/collectives/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_executable(co_sum_test co_sum.F90)
+target_link_libraries(co_sum_test OpenCoarrays)
+
+add_executable(co_broadcast_test co_broadcast.F90)
+target_link_libraries(co_broadcast_test OpenCoarrays)
+
+add_executable(co_min_test co_min.F90)
+target_link_libraries(co_min_test OpenCoarrays)
+
+add_executable(co_max_test co_max.F90)
+target_link_libraries(co_max_test OpenCoarrays)
+
+add_executable(co_reduce_test co_reduce.F90)
+target_link_libraries(co_reduce_test OpenCoarrays)
diff --git a/src/tests/unit/collectives/co_broadcast.F90 b/src/tests/unit/collectives/co_broadcast.F90
new file mode 100644
index 0000000..1a74098
--- /dev/null
+++ b/src/tests/unit/collectives/co_broadcast.F90
@@ -0,0 +1,92 @@
+! Copyright (c) 2012-2016, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+! Unit tests for co_broadcast and co_sum
+program main
+  use iso_fortran_env, only : error_unit
+  use iso_c_binding, only : c_int,c_double,c_char
+#ifdef USE_EXTENSIONS
+  use opencoarrays
+#endif
+  implicit none
+  integer(c_int) :: me
+  ! Set test failure as the default result
+  logical :: c_char_test_passes=.false.,c_int_test_passes=.false.,c_double_test_passes=.false.
+
+  ! Store the executing image number
+  me=this_image()
+
+#ifdef USE_EXTENSIONS
+  if (me==1) print *,"Using the extensions from the opencoarrays module."
+#endif
+
+  ! Verify broadcasting of character data from image 1
+  c_char_co_broadcast: block
+    character(kind=c_char,len=14), save :: string_received[*]
+    character(kind=c_char,len=*), parameter :: string_sent=c_char_"Hello, world!"! Character test message
+    if (me==1) string_received=string_sent
+    sync all
+    call co_broadcast(string_received,source_image=1)
+    if (string_received/=string_sent) then
+      write(error_unit,*) "Incorrect co_broadcast(",string_received,") on image",me
+    else
+      c_char_test_passes=.true.
+    end if
+  end block c_char_co_broadcast
+
+  ! Verify broadcasting of integer data from image 1
+  c_int_co_broadcast: block
+    integer(c_int), save :: integer_received[*]
+    integer(c_int), parameter :: integer_sent=12345_c_int ! Integer test message
+    if (me==1) integer_received=integer_sent
+    sync all
+    call co_broadcast(integer_received,source_image=1)
+    if (integer_received/=integer_sent) then
+      write(error_unit,*) "Incorrect co_broadcast(",integer_received,") on image",me
+    else
+      c_int_test_passes=.true.
+    end if
+  end block c_int_co_broadcast
+
+  ! Verify broadcasting of real data from image 1
+  c_double_co_broadcast: block
+    real(c_double), save :: real_received[*]
+    real(c_double), parameter :: real_sent=2.7182818459045_c_double ! Real test message
+    if (me==1) real_received=real_sent
+    sync all
+    call co_broadcast(real_received,source_image=1)
+    if (real_received/=real_sent) then
+      write(error_unit,*) "Incorrect co_broadcast(",real_received,") on image",me
+    else
+      c_double_test_passes=.true.
+    end if
+  end block c_double_co_broadcast
+
+
+  if (.not.all([c_char_test_passes,c_int_test_passes,c_double_test_passes])) error stop
+  ! Wait for everyone to pass the tests
+  sync all
+  if (me==1) print *, "Test passed."
+end program
diff --git a/src/tests/unit/collectives/co_max.F90 b/src/tests/unit/collectives/co_max.F90
new file mode 100644
index 0000000..e6ecaf1
--- /dev/null
+++ b/src/tests/unit/collectives/co_max.F90
@@ -0,0 +1,71 @@
+! Copyright (c) 2012-2016, Sourcery, Inc.
+! All rights reserved.
+!
+! Unit tests for co_max: verify parallel, collective maximum
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+program main
+  use iso_fortran_env, only : error_unit
+  use iso_c_binding, only : c_int,c_double
+#ifdef USE_EXTENSIONS
+  use opencoarrays
+#endif
+  implicit none
+
+#ifdef USE_EXTENSIONS
+  if (this_image()==1) print *,"Using the extensions from the opencoarrays module."
+#endif
+
+  ! Verify that 1 is the lowest image number
+  c_int_co_min: block
+    integer(c_int) :: me
+    me=this_image()
+    sync all
+    call co_max(me)
+    if (me/=num_images()) then
+      write(error_unit,"(2(a,i2))") "Wrong result (",me,") on image",this_image()
+      error stop
+    end if
+    ! Wait for all images to pass the test
+    sync all
+    if (me==1) print *,"Correct integer(c_int) co_min"
+  end block c_int_co_min
+
+  ! Verify that the maximum real conversion of an image number is real(num_images())
+  c_double_co_max: block
+    real(c_double) :: me
+    me=real(this_image(),c_double)
+    sync all
+    call co_max(me)
+    if (me/=real(num_images(),c_double)) then
+      write(error_unit,"(2(a,i2))") "Wrong result (",me,") on image",this_image()
+      error stop
+    end if
+    ! Wait for all images to pass the test
+    sync all
+    if (this_image()==1) print *,"Correct real(c_double) co_max"
+  end block c_double_co_max
+
+  if (this_image()==1) print *, "Test passed."
+end program
diff --git a/src/tests/unit/collectives/co_min.F90 b/src/tests/unit/collectives/co_min.F90
new file mode 100644
index 0000000..6023ad0
--- /dev/null
+++ b/src/tests/unit/collectives/co_min.F90
@@ -0,0 +1,71 @@
+! Copyright (c) 2012-2016, Sourcery, Inc.
+! All rights reserved.
+!
+! Unit tests for co_min: verify parallel, collective minimum
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+program main
+  use iso_fortran_env, only : error_unit
+  use iso_c_binding, only : c_int,c_double
+#ifdef USE_EXTENSIONS
+  use opencoarrays
+#endif
+  implicit none
+  logical :: co_min_c_int_verified=.false.,co_min_c_double_verified=.false.
+
+#ifdef USE_EXTENSIONS
+  if (this_image()==1) print *,"Using the extensions from the opencoarrays module."
+#endif
+
+  ! Verify that 1 is the lowest image number
+  c_int_co_min: block
+    integer(c_int) :: me
+    me=this_image()
+    sync all
+    call co_min(me)
+    if (me==1) then
+      co_min_c_int_verified=.true.
+    else
+      write(error_unit,"(2(a,i2))") "co_min fails for integer(c_int) argument with result (",me,") on image",this_image()
+    end if
+  end block c_int_co_min
+
+  ! Verify that 1.0 is the minimum real conversion of an image number
+  c_double_co_min: block
+    real(c_double) :: me
+    me=real(this_image(),c_double)
+    sync all
+    call co_min(me)
+    if (me==1._c_double) then
+      co_min_c_double_verified=.true.
+    else
+      write(error_unit,"(2(a,i2))") "co_min fails for integer(c_double) argument with result (",me,") on image",this_image()
+    end if
+  end block c_double_co_min
+
+  if (.not. all([co_min_c_int_verified,co_min_c_double_verified])) error stop
+  ! Wait for all images to pass the tests
+  sync all
+  if (this_image()==1) print *, "Test passed."
+end program
diff --git a/src/tests/unit/collectives/co_reduce.F90 b/src/tests/unit/collectives/co_reduce.F90
new file mode 100644
index 0000000..16ba574
--- /dev/null
+++ b/src/tests/unit/collectives/co_reduce.F90
@@ -0,0 +1,130 @@
+! Copyright (c) 2012-2016, Sourcery, Inc.
+! All rights reserved.
+!
+! Unit tests for co_min: verify parallel, collective minimum
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+module co_intrinsics_module
+#ifdef USE_EXTENSIONS
+  use opencoarrays
+#endif
+  implicit none
+
+  private
+  public :: co_all
+  public :: co_product
+
+  interface co_all
+    module procedure co_all_logical
+  end interface
+
+  interface co_product
+    module procedure co_product_c_int,co_product_c_double
+  end interface
+
+contains
+
+  subroutine co_all_logical(a)
+    logical, intent(inout) :: a(:)
+    call co_reduce(a,and)
+  contains
+    pure function and(lhs,rhs) result(lhs_and_rhs) bind(C,name="and")
+      logical, intent(in) :: lhs,rhs
+      logical :: lhs_and_rhs
+      lhs_and_rhs = lhs .and. rhs
+    end function
+  end subroutine
+
+  subroutine co_product_c_int(a)
+    use iso_c_binding, only : c_int
+    integer(c_int), intent(inout) :: a
+    call co_reduce(a,product_)
+  contains
+    pure function product_(lhs,rhs) result(lhs_x_rhs) bind(C,name="product_")
+      integer(c_int), intent(in) :: lhs,rhs
+      integer(c_int) :: lhs_x_rhs
+      lhs_x_rhs = lhs * rhs
+    end function
+  end subroutine
+
+  subroutine co_product_c_double(a)
+    use iso_c_binding, only : c_double
+    real(c_double), intent(inout) :: a
+    call co_reduce(a,product_)
+  contains
+    pure function product_(lhs,rhs) result(lhs_x_rhs)
+      real(c_double), intent(in) :: lhs,rhs
+      real(c_double) :: lhs_x_rhs
+      lhs_x_rhs = lhs * rhs
+    end function
+  end subroutine
+
+end module
+
+program main
+  use iso_fortran_env, only : error_unit
+  use iso_c_binding, only : c_int,c_double
+  use co_intrinsics_module, only : co_all,co_product
+#ifdef USE_EXTENSIONS
+  use opencoarrays
+#endif
+  implicit none
+  logical :: logical_passes=.false.,c_int_passes=.false.
+
+#ifdef USE_EXTENSIONS
+  if (this_image()==1) print *,"Using the extensions from the opencoarrays module."
+#endif
+
+  ! Verify that every image has a "true" variable with the value .true.
+  verify_co_reduce_logical: block
+    logical,dimension(10) :: true=.true.
+    sync all
+    call co_all(true)
+    if (all(true .eqv. .true.)) then
+      logical_passes=.true.
+    else
+      write(error_unit,"(2(a,i2))") "co_reduce fails for logical argument with result (",true,") on image",this_image()
+    end if
+  end block verify_co_reduce_logical
+
+  ! Verify the product of image number
+  verify_co_reduce_c_int: block
+    integer(c_int) :: me,i
+    me=this_image()
+    sync all
+    call co_product(me)
+    if (me==product(int([(i,i=1,num_images())],c_int))) then
+      c_int_passes=.true.
+    else
+      write(error_unit,"(2(a,i2))") "co_reduce fails integer(c_int) argument with result (",me,") on image",this_image()
+    end if
+  end block verify_co_reduce_c_int
+
+  ! Verify that this image's tests passed
+  if (.not.all([logical_passes,c_int_passes])) error stop
+
+  ! Wait for verification that all images to pass the tests
+  sync all
+  if (this_image()==1) print *, "Test passed."
+end program
diff --git a/src/tests/unit/collectives/co_sum.F90 b/src/tests/unit/collectives/co_sum.F90
new file mode 100644
index 0000000..41bc223
--- /dev/null
+++ b/src/tests/unit/collectives/co_sum.F90
@@ -0,0 +1,86 @@
+! Copyright (c) 2012-2016, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of Sourcery, Inc., nor the
+!       names of any other contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE
+! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+! Unit tests for co_sum
+program main
+  use iso_fortran_env, only : error_unit
+  use iso_c_binding, only : c_int,c_double
+#ifdef USE_EXTENSIONS
+  use opencoarrays
+#endif
+  implicit none
+  logical :: co_sum_c_int_verified=.false.,co_sum_c_double_verified=.false.
+
+#ifdef USE_EXTENSIONS
+  if (this_image()==1) print *,"Using the extensions from the opencoarrays module."
+#endif
+
+  ! Verify collective sum of integer data by tallying image numbers
+  c_int_co_sum: block
+    integer(c_int) :: i,me
+    me=this_image()
+    sync all
+    call co_sum(me)
+    if (me==sum([(i,i=1,num_images())])) then
+      co_sum_c_int_verified=.true.
+    else
+      write(error_unit,"(2(a,i2))") "co_broadcast with integer(c_int) argument fails with result (",me,") on image",this_image()
+    end if
+  end block c_int_co_sum
+
+  ! Verify collective sum by calculuating pi
+  c_double_co_sum: block
+    real(c_double), parameter :: four=4._c_double,one=1._c_double,half=0.5_c_double
+    real(c_double), save :: pi
+    integer(c_int) :: i,points_per_image
+    integer(c_int), parameter :: resolution=1024_c_int ! Number of points used in pi calculation
+    integer(c_int) :: me
+    me=this_image()
+    ! Partition the calculation evenly across all images
+    if (mod(resolution,num_images())/=0) then
+      write(error_unit,"(a)") "number of images doesn't evenly divide into number of points"
+      error stop
+    end if
+    points_per_image=resolution/num_images()
+    associate(n=>resolution,my_first=>points_per_image*(me-1)+1,my_last=>points_per_image*me)
+      pi = sum([ (four/(one+((i-half)/n)**2),i=my_first,my_last) ])/n
+    end associate
+    sync all
+    ! Replace pi on each image with the sum of the pi contributions from all images
+    call co_sum(pi)
+    associate (pi_ref=>acos(-1._c_double),allowable_fractional_error=>0.000001_c_double)
+      if (abs((pi-pi_ref)/pi_ref)<=allowable_fractional_error) then
+        co_sum_c_double_verified=.true.
+      else
+        write(error_unit,*) "co_broadcast with real(c_double) argument fails with result (",pi,") result on image ",me
+      end if
+    end associate
+  end block c_double_co_sum
+
+  if (.not. all([co_sum_c_int_verified,co_sum_c_double_verified])) error stop
+  ! Wait for every image to pass
+  sync all
+  if (this_image()==1) print *, "Test passed."
+end program
diff --git a/src/tests/unit/extensions/CMakeLists.txt b/src/tests/unit/extensions/CMakeLists.txt
new file mode 100644
index 0000000..f6fbf5e
--- /dev/null
+++ b/src/tests/unit/extensions/CMakeLists.txt
@@ -0,0 +1,44 @@
+if("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "GNU")
+  set(gfortran_compiler true)
+endif()
+
+if("${gfortran_compiler}" AND (NOT "${opencoarrays_aware_compiler}"))
+  # This applied to gfortran 4.9 and some earlier versions (TODO: find out which)
+  add_definitions(-DCOMPILER_SUPPORTS_CAF_INTRINSICS)
+endif()
+
+function(generate_test_script base_name num_images)
+  set(source ${base_name}.F90)
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/../collectives/${source}
+    ${CMAKE_CURRENT_BINARY_DIR}/${source}
+    COPYONLY
+  )
+  # Now we write the script that compiles and runs the test
+  set(harness "${CMAKE_BINARY_DIR}/bin_staging/test-${base_name}-extension.sh")
+  install(
+      FILES "${harness}"
+      PERMISSIONS WORLD_EXECUTE WORLD_READ WORLD_WRITE OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_EXECUTE GROUP_READ GROUP_WRITE
+      DESTINATION ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  file(WRITE  "${harness}" "#!/bin/bash\n")
+  file(APPEND "${harness}" "cd ${CMAKE_CURRENT_BINARY_DIR}\n")
+  set(executable "${base_name}_extension")
+  if (opencoarrays_aware_compiler)
+    # Explicitly include the directory containing the .mod file because the caf script won't pass it
+    # automatically when the script wraps an OpenCoarrays-aware compiler
+    file(APPEND
+      "${harness}"
+      "FC=mpif90 ${CMAKE_INSTALL_PREFIX}/bin/caf ${source} -o ${executable} -I${CMAKE_BINARY_DIR}/mod -DUSE_EXTENSIONS\n"
+    )
+  else()
+    file(APPEND "${harness}" "FC=mpif90 ${CMAKE_INSTALL_PREFIX}/bin/caf ${source} -o ${executable} -DUSE_EXTENSIONS\n")
+  endif()
+  file(APPEND "${harness}" "${CMAKE_INSTALL_PREFIX}/bin/cafrun -np ${num_images} ./${executable}\n")
+endfunction(generate_test_script)
+
+generate_test_script(co_sum 4)
+generate_test_script(co_broadcast 4)
+generate_test_script(co_min 4)
+generate_test_script(co_max 4)
+generate_test_script(co_reduce 4)
diff --git a/src/tests/unit/init_register/CMakeLists.txt b/src/tests/unit/init_register/CMakeLists.txt
new file mode 100644
index 0000000..90fb3c2
--- /dev/null
+++ b/src/tests/unit/init_register/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_executable(initialize_mpi initialize_mpi.f90)
+target_link_libraries(initialize_mpi OpenCoarrays)
+
+add_executable(register register.f90)
+target_link_libraries(register OpenCoarrays)
+
+add_executable(register_rename_me register_rename_me.f90)
+target_link_libraries(register_rename_me OpenCoarrays)
+
+add_executable(register_rename_me_too register_rename_me_too.f90)
+target_link_libraries(register_rename_me_too OpenCoarrays)
+
+add_executable(allocate_as_barrier allocate_as_barrier.f90)
+target_link_libraries(allocate_as_barrier OpenCoarrays)
+
+add_executable(allocate_as_barrier_proc allocate_as_barrier_proc.f90)
+target_link_libraries(allocate_as_barrier_proc OpenCoarrays)
diff --git a/src/tests/unit/init_register/allocate_as_barrier.f90 b/src/tests/unit/init_register/allocate_as_barrier.f90
new file mode 100644
index 0000000..199949f
--- /dev/null
+++ b/src/tests/unit/init_register/allocate_as_barrier.f90
@@ -0,0 +1,24 @@
+! This test checks if allocate on coarray variables acts as a barrier.
+
+program alloc_as_barrier
+  implicit none
+
+  integer :: me[*]
+  integer,allocatable :: a(:)[:]
+
+  me = this_image()
+
+  if(me == 1) call sleep(1)
+
+  allocate(a(10)[*])
+
+  if(me > 1) then
+    a = me[me-1]
+    if(any(a /= me-1)) then
+      write(*,*) "Test failed."
+    else
+      write(*,*) "Test passed."
+    endif
+  endif
+
+end program
diff --git a/src/tests/unit/init_register/allocate_as_barrier_proc.f90 b/src/tests/unit/init_register/allocate_as_barrier_proc.f90
new file mode 100644
index 0000000..c8956ef
--- /dev/null
+++ b/src/tests/unit/init_register/allocate_as_barrier_proc.f90
@@ -0,0 +1,32 @@
+! This test checks if allocate on coarray variables acts as a barrier.
+
+program alloc_as_barrier
+  implicit none
+
+  integer :: me
+
+  call test_alloc(me)
+
+  if(this_image() == 2) then
+    if(me /= 1) then
+      write(*,*) "Test failed.",me
+    else
+      write(*,*) "Test passed."
+    endif
+  endif
+
+contains
+
+  subroutine test_alloc(me)
+    integer,intent(out) :: me
+    integer,allocatable :: a(:)[:]
+
+    me = this_image()
+    if(me == 1) call sleep(1)
+    allocate(a(10)[*],source=me)
+    if(me > 1) me = a(2)[this_image()-1]
+    deallocate(a)
+
+  end subroutine
+
+end program
diff --git a/src/tests/unit/init_register/initialize_mpi.f90 b/src/tests/unit/init_register/initialize_mpi.f90
new file mode 100644
index 0000000..88dba1f
--- /dev/null
+++ b/src/tests/unit/init_register/initialize_mpi.f90
@@ -0,0 +1,46 @@
+! Unit test for initializion of MPI by LIBCAF_MPI.
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+program initialize_mpi
+  use mpi, only : MPI_COMM_SIZE,MPI_COMM_WORLD
+  implicit none
+
+  ! Set invalid default image number and number of ranks
+  integer :: me=-1,np=-1,ierr
+
+  ! Get image number
+  me = this_image()
+
+  ! Get number of ranks (np)
+  call MPI_COMM_SIZE(MPI_COMM_WORLD,np,ierr)
+
+  ! Everybody verifies that they have a valid image number and rank
+  if(me < 1 .or. np < 1) error stop "Test failed."
+
+  ! Image 1 reports test success
+  if(me==1) print *,"Test passed."
+end program
diff --git a/src/tests/unit/init_register/register.f90 b/src/tests/unit/init_register/register.f90
new file mode 100644
index 0000000..2cf59cb
--- /dev/null
+++ b/src/tests/unit/init_register/register.f90
@@ -0,0 +1,47 @@
+! Unit test for register procedure. Testing static coarrays.
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+program register
+  implicit none
+  integer, parameter :: invalid_image_number=-1
+  integer, save :: me[*] = invalid_image_number
+
+  if (num_images()<2) error stop "This test requires at least 2 images."
+
+  me = this_image()
+
+  sync all
+  if(me == 1) then
+    block
+      integer :: image2number
+      image2number = me[2]
+      if (image2number/= 2) error stop "Test failed."
+      print *,"Test passed."
+    end block
+  end if
+
+end program
diff --git a/src/tests/unit/init_register/register_rename_me.f90 b/src/tests/unit/init_register/register_rename_me.f90
new file mode 100644
index 0000000..e1174e8
--- /dev/null
+++ b/src/tests/unit/init_register/register_rename_me.f90
@@ -0,0 +1,43 @@
+! Unit test for register procedure. Testing static arrays coarrays.
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+program register2
+  implicit none
+  integer, parameter :: array_size=10
+  integer :: np, arr1(array_size)[*]
+
+  np = num_images()
+  arr1 = this_image()
+
+  sync all
+
+  if(this_image() == 1) then
+     if(size(arr1) /= array_size) error stop 'Test failed.'
+     print *,'Test passed.'
+  endif
+
+end program
diff --git a/src/tests/unit/init_register/register_rename_me_too.f90 b/src/tests/unit/init_register/register_rename_me_too.f90
new file mode 100644
index 0000000..df9b9c5
--- /dev/null
+++ b/src/tests/unit/init_register/register_rename_me_too.f90
@@ -0,0 +1,49 @@
+! Unit test for register procedure. Testing allocatable arrays coarrays.
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+! SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+program register3
+  implicit none
+  integer, parameter :: invalid_rank=-2
+  integer :: np=invalid_rank,array_size=10
+  integer,allocatable :: array(:)[:]
+
+  np = num_images()
+  allocate(array(array_size)[*],source=this_image())
+
+  block
+    logical :: res = .true.
+    if(this_image() == 1) then
+      if(size(array) /= array_size) error stop "Test failed."
+    endif
+
+    deallocate(array)
+
+    if(allocated(array)) error stop "Test failed."
+    if(this_image() == 1) print *,"Test passed."
+  end block
+
+end program
diff --git a/src/tests/unit/send-get/CMakeLists.txt b/src/tests/unit/send-get/CMakeLists.txt
new file mode 100644
index 0000000..308408f
--- /dev/null
+++ b/src/tests/unit/send-get/CMakeLists.txt
@@ -0,0 +1,20 @@
+add_executable(get_array get_array_test.f90)
+target_link_libraries(get_array OpenCoarrays)
+
+#add_executable(get old_get_array_test.f90)
+#target_link_libraries(is_this_still_needed OpenCoarrays)
+
+add_executable(get_self sameloc.f90)
+target_link_libraries(get_self OpenCoarrays)
+
+add_executable(send_array send_array_test.f90)
+target_link_libraries(send_array OpenCoarrays)
+
+add_executable(get_with_offset_1d get_with_offset_1d.f90)
+target_link_libraries(get_with_offset_1d OpenCoarrays)
+
+add_executable(whole_get_array whole_get_array.f90)
+target_link_libraries(whole_get_array OpenCoarrays)
+
+add_executable(strided_get strided_get.f90)
+target_link_libraries(strided_get OpenCoarrays)
diff --git a/src/tests/unit/send-get/get_array_test.f90 b/src/tests/unit/send-get/get_array_test.f90
new file mode 100644
index 0000000..db4e27a
--- /dev/null
+++ b/src/tests/unit/send-get/get_array_test.f90
@@ -0,0 +1,420 @@
+!
+! This program does a correctness check for
+! ... = ARRAY[idx] and ... = SCALAR[idx]
+!
+program main
+  implicit none
+  integer, parameter :: n = 3
+  integer, parameter :: m = 4
+
+  ! Allocatable coarrays
+  call one(-5, 1)
+  call one(0, 0)
+  call one(1, -5)
+  call one(0, -11)
+
+  ! Static coarrays
+!  call two()
+!  call three()
+  write(*,*) 'Test passed'
+contains
+  subroutine one(lb1, lb2)
+    integer, value :: lb1, lb2
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, allocatable :: caf(:,:)[:]
+    integer, allocatable :: a(:,:), b(:,:), c(:,:)
+
+    allocate(caf(lb1:n+lb1-1, lb2:m+lb2-1)[*], &
+         a(lb1:n+lb1-1, lb2:m+lb2-1), &
+         b(lb1:n+lb1-1, lb2:m+lb2-1), &
+         c(lb1:n+lb1-1, lb2:m+lb2-1))
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    a = b
+    caf = -42
+    c = caf
+    sync all
+    if(this_image() == 1) then
+       a(:,:) = caf(lb1,lb2)[num_images()]
+    end if
+    sync all
+    if(this_image()==1) then
+       if(any (a /= c))   call abort()
+    endif
+
+    ! Whole array: ARRAY = ARRAY
+    caf = -42
+    a = b
+    c = caf
+    if (this_image() == 1) then
+       a(:,:) = caf(:,:)[num_images()]
+    endif
+    sync all
+    if(this_image()==1) then
+       if (any (a /= c)) then
+          print *, 'RES 1:', any (a /= c)
+          print *, a
+          print *, c
+          ! FIXME: Without the print lines above, it always fails. Why?
+          call abort()
+       end if
+    endif
+
+    ! Scalar assignment
+    a = -42
+    caf = -42
+    c = caf
+    sync all
+    do j = lb2, m+lb2-1
+       do i = n+lb1-1, lb1, -2
+          a(i,j) = b(i,j)
+       end do
+    end do
+    do j = lb2, m+lb2-1
+       do i = lb1, n+lb1-1, 2
+          a(i,j) = b(i,j)
+       end do
+    end do
+    sync all
+    if(this_image() == 1) then
+       do j = lb2, m+lb2-1
+          do i = n+lb1-1, lb1, -2
+             a(i,j) = caf(i,j)[num_images()]
+          end do
+       end do
+       do j = lb2, m+lb2-1
+          do i = lb1, n+lb1-1, 2
+             a(i,j) = caf(i,j)[num_images()]
+          end do
+       end do
+    endif
+    sync all
+    if(this_image() == 1) then
+       if (any (a /= c)) then
+          print *, 'RES 2:', any (a /= c)
+          print *, this_image(), ': ', a
+          print *, this_image(), ': ', c
+          ! FIXME: Without the print lines above, it always fails. Why?
+          call abort()
+       end if
+    endif
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+       do i_sgn2 = -1, 1, 2
+          do i=lb1, n+lb1-1
+             do i_e=lb1, n+lb1-1
+                do i_s=1, n
+                   do j=lb2, m+lb2-1
+                      do j_e=lb2, m+lb2-1
+                         do j_s=1, m
+                            ! ARRAY = SCALAR
+                            a = -42
+                            caf = -42
+                            c = a
+                            a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = b(lb1, lb2)
+                            sync all
+                            if (this_image() == 1) then
+                               a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = caf(lb1,lb2)[num_images()]
+                            end if
+                            sync all
+                            if (this_image() == 1) then
+                               if (any (a /= c)) then
+                                  print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                                       lb2,":",m+lb2-1
+                                  print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                                       ", ", j,":",j_e,":",j_s*i_sgn2
+                                  print *, i
+                                  print *, a
+                                  print *, c
+                                  print *, a-c
+                                  call abort()
+                               endif
+                            end if
+                            ! ARRAY = ARRAY
+                            caf = -42
+                            a = -42
+                            c = a
+                            a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                                 = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                            sync all
+                            if (this_image() == 1) then
+                               a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()]
+                            end if
+                            sync all
+
+                            if (this_image() == 1) then
+                               if (any (a /= c)) then
+                                  print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                                       lb2,":",m+lb2-1
+                                  print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                                       ", ", j,":",j_e,":",j_s*i_sgn2
+                                  print *, i
+                                  print *, a
+                                  print *, c
+                                  print *, a-c
+                                  call abort()
+                               endif
+                            end if
+                         end do
+                      end do
+                   end do
+                end do
+             end do
+          end do
+       end do
+    end do
+  end subroutine one
+
+  subroutine two()
+    integer, parameter :: lb1 = -5, lb2 = 1
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, save :: caf(lb1:n+lb1-1, lb2:m+lb2-1)[*]
+    integer, save :: a(lb1:n+lb1-1, lb2:m+lb2-1)
+    integer, save :: b(lb1:n+lb1-1, lb2:m+lb2-1)
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    caf = -42
+    a = -42
+    a(:,:) = b(lb1, lb2)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(lb1, lb2)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    caf = -42
+    a = -42
+    a(:,:) = b(:, :)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(:, :)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Scalar assignment
+    caf = -42
+    a = -42
+    do j = lb2, m+lb2-1
+      do i = n+lb1-1, 1, -2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    do j = lb2, m+lb2-1
+      do i = 1, n+lb1-1, 2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    sync all
+    if (this_image() == 1) then
+      do j = lb2, m+lb2-1
+        do i = n+lb1-1, 1, -2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+      do j = lb2, m+lb2-1
+        do i = 1, n+lb1-1, 2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = b(lb1, lb2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(lb1, lb2)
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                         = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    end if
+                    sync all
+
+                    if (this_image() == num_images()) then
+                      if (any (a /= caf)) then
+                        print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                             lb2,":",m+lb2-1
+                        print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                             ", ", j,":",j_e,":",j_s*i_sgn2
+                        print *, i
+                        print *, a
+                        print *, caf
+                        print *, a-caf
+                        call abort()
+                      endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine two
+
+  subroutine three()
+    integer, parameter :: lb1 = 0, lb2 = 0
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, save :: caf(lb1:n+lb1-1, lb2:m+lb2-1)[*]
+    integer, save :: a(lb1:n+lb1-1, lb2:m+lb2-1)
+    integer, save :: b(lb1:n+lb1-1, lb2:m+lb2-1)
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    caf = -42
+    a = -42
+    a(:,:) = b(lb1, lb2)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(lb1, lb2)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    caf = -42
+    a = -42
+    a(:,:) = b(:, :)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(:, :)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Scalar assignment
+    caf = -42
+    a = -42
+    do j = lb2, m+lb2-1
+      do i = n+lb1-1, 1, -2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    do j = lb2, m+lb2-1
+      do i = 1, n+lb1-1, 2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    sync all
+    if (this_image() == 1) then
+      do j = lb2, m+lb2-1
+        do i = n+lb1-1, 1, -2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+      do j = lb2, m+lb2-1
+        do i = 1, n+lb1-1, 2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = b(lb1, lb2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(lb1, lb2)
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                         = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    end if
+                    sync all
+
+                    if (this_image() == num_images()) then
+                      if (any (a /= caf)) then
+                        print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                             lb2,":",m+lb2-1
+                        print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                             ", ", j,":",j_e,":",j_s*i_sgn2
+                        print *, i
+                        print *, a
+                        print *, caf
+                        print *, a-caf
+                        call abort()
+                      endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine three
+end program main
diff --git a/src/tests/unit/send-get/get_with_offset_1d.f90 b/src/tests/unit/send-get/get_with_offset_1d.f90
new file mode 100644
index 0000000..17d4343
--- /dev/null
+++ b/src/tests/unit/send-get/get_with_offset_1d.f90
@@ -0,0 +1,31 @@
+program get_offset_1d
+  implicit none
+
+  integer,allocatable :: a(:)[:],b(:)
+  integer :: me,np,i
+
+  me = this_image()
+  np = num_images()
+
+  allocate(a(100)[*],b(10))
+
+  a = (/ (i, i=1,100) /)
+
+  do i=1,100
+    a(i) = a(i) + me
+  enddo
+
+  sync all
+
+  if(me < np) then
+    b(:) = a(21:30)[me+1]
+  endif
+
+  if(me == 1) then
+    do i=1,10
+      if(b(i) /= 20+i+me+1) call abort()
+    enddo
+    write(*,*) 'Test passed.'
+  endif
+
+end program
diff --git a/src/tests/unit/send-get/old_get_array_test.f90 b/src/tests/unit/send-get/old_get_array_test.f90
new file mode 100644
index 0000000..3493466
--- /dev/null
+++ b/src/tests/unit/send-get/old_get_array_test.f90
@@ -0,0 +1,344 @@
+!
+! This program does a correctness check for
+! ARRAY = SCALAR and ARRAY = ARRAY
+!
+program main
+  implicit none
+  integer, parameter :: n = 3
+  integer, parameter :: m = 4
+
+  ! Allocatable coarrays
+  call one(-5, 1)
+  call one(0, 0)
+  call one(1, -5)
+  call one(0, -11)
+
+  ! Static coarrays
+  call two()
+  call three()
+  write(*,*) 'Test passed'
+contains
+  subroutine one(lb1, lb2)
+    integer, value :: lb1, lb2
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, allocatable :: caf(:,:)[:]
+    integer, allocatable :: a(:,:), b(:,:)
+
+    allocate(caf(lb1:n+lb1-1, lb2:m+lb2-1)[*], &
+         a(lb1:n+lb1-1, lb2:m+lb2-1), &
+         b(lb1:n+lb1-1, lb2:m+lb2-1))
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    a = -42
+    caf = -42
+    if(this_image() == num_images()) then
+       caf = b
+    endif
+    sync all
+    if (this_image() == 1) then
+      a(:,:) = caf(lb1,lb2)[num_images()]
+      print *, this_image(), '//', a, '//', b(lb1,lb2)
+      print *, '>>>', any(a /= b(lb1,lb2))
+      if (any (a /= b(lb1,lb2))) then
+! FIXME: ABORTS UNLESS THERE IS SOME OTHER CODE
+print *, 'HELLO!!!!!!!!!!!!!!!!!'
+        call abort()
+      end if
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    a = -42
+    caf = -42
+    if(this_image() == num_images()) then
+       caf = b
+    endif
+    sync all
+    if (this_image() == 1) then
+      a(:,:) = caf(:,:)[num_images()]
+      if (any (a /= b)) &
+!FIXME
+        print *, a
+        print *, b
+        print *, 'WRONG:', any (a /= b)
+        call abort()
+      end if
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                    a = -12
+       		    caf = -42
+    		    if(this_image() == num_images()) then
+       		       caf = b
+    		    endif
+                    sync all
+                    if (this_image() == 1) then
+		      b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+			   = caf(lb1, lb2)[num_images()]
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    a = -12
+		    b = -32
+                    if(this_image() == num_images()) then
+               	       caf = a
+              	     else
+                       caf = -42
+               	    endif
+                    sync all
+                    if (this_image() == 1) then
+!		      b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+!			   = caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()]
+                    end if
+                    sync all
+
+                    if (this_image() == 1) then
+ !                     if (any (a /= b)) then
+ !                       print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+ !                            lb2,":",m+lb2-1
+ !                       print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+ !                            ", ", j,":",j_e,":",j_s*i_sgn2
+ !                       print *, i
+ !                       print *, a
+ !                       print *, caf
+ !                       print *, a-caf
+ !                       call abort()
+ !                     endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine one
+
+  subroutine two()
+    integer, parameter :: lb1 = -5, lb2 = 1
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, save :: caf(lb1:n+lb1-1, lb2:m+lb2-1)[*]
+    integer, save :: a(lb1:n+lb1-1, lb2:m+lb2-1)
+    integer, save :: b(lb1:n+lb1-1, lb2:m+lb2-1)
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    a = -12
+    b = -32
+    if(this_image() == num_images()) then
+      caf = a
+    else
+      caf = -42
+    endif
+    sync all
+    if (this_image() == 1) then
+      b(:,:) = caf(lb1,lb2)[num_images()]
+    end if
+    sync all
+    if (this_image() == 1) then
+      if (any (a /= b)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    a = -12
+    b = -32
+    if(this_image() == num_images()) then
+      caf = a
+    else
+      caf = -42
+    endif
+    sync all
+    if (this_image() == 1) then
+      b(:,:) = caf(:,:)[num_images()]
+    end if
+    sync all
+    if (this_image() == 1) then
+      if (any (a /= b)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                     a = -12
+                     b = -32
+                     if(this_image() == num_images()) then
+                        caf = a
+                     else
+                        caf = -42
+                     endif
+                    sync all
+                    if (this_image() == 1) then
+                       b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = caf(lb1,lb2)[num_images()]
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    b = -32
+                    a = -12
+                    if(this_image() == num_images()) then
+                       caf = a
+                    else
+                       caf = -42
+                    endif
+                    sync all
+                    if (this_image() == 1) then
+!                       b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+!                            =caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()]
+                    end if
+                    sync all
+
+                    if (this_image() == 1) then
+ !                     if (any (a /= b)) then
+ !                       print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+ !                            lb2,":",m+lb2-1
+ !                       print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+ !                            ", ", j,":",j_e,":",j_s*i_sgn2
+ !                       print *, i
+ !                       print *, a
+ !                       print *, caf
+ !                       print *, a-caf
+ !                       call abort()
+ !                     endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine two
+
+  subroutine three()
+    integer, parameter :: lb1 = 0, lb2 = 0
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, save :: caf(lb1:n+lb1-1, lb2:m+lb2-1)[*]
+    integer, save :: a(lb1:n+lb1-1, lb2:m+lb2-1)
+    integer, save :: b(lb1:n+lb1-1, lb2:m+lb2-1)
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    a = -12
+    b = -32
+    if(this_image() == num_images()) then
+      caf = a
+    else
+      caf = -42
+    endif
+    sync all
+    if (this_image() == 1) then
+       b(:,:) = caf(lb1,lb2)[num_images()]
+    end if
+    sync all
+    if (this_image() == 1) then
+      if (any (a /= b)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    a = -12
+    b = -32
+    if(this_image() == num_images()) then
+      caf = a
+    else
+      caf = -42
+    endif
+    sync all
+    if (this_image() == 1) then
+       b(:,:) = caf(:,:)[num_images()]
+    end if
+    sync all
+    if (this_image() == 1) then
+      if (any (a /= b)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                     a = -12
+                     b = -32
+                     if(this_image() == num_images()) then
+                        caf = a
+                     else
+                        caf = -42
+                     endif
+                    sync all
+                    if (this_image() == 1) then
+                       b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                            = caf(lb1,lb2)[num_images()]
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                     a = -12
+                     b = -32
+                     if(this_image() == num_images()) then
+                        caf = a
+                     else
+                        caf = -42
+                     endif
+                     sync all
+                    if (this_image() == 1) then
+!                       b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+!                            = caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()]
+                    end if
+                    sync all
+
+                    if (this_image() == 1) then
+!                      if (any (a /= b)) then
+!                        print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+!                             lb2,":",m+lb2-1
+!                        print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+!                             ", ", j,":",j_e,":",j_s*i_sgn2
+!                        print *, i
+!                        print *, a
+!                        print *, caf
+!                        print *, a-caf
+!                        call abort()
+!                      endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine three
+end program main
diff --git a/src/tests/unit/send-get/sameloc.f90 b/src/tests/unit/send-get/sameloc.f90
new file mode 100644
index 0000000..c511c69
--- /dev/null
+++ b/src/tests/unit/send-get/sameloc.f90
@@ -0,0 +1,139 @@
+! This program tests the capability of copying data on the
+! same memory location and within the same image from
+! different memory locations.
+! NOTE:
+! In order to run this test successfully the efficient
+! strided transfer support must be disabled.
+program sameloc
+  implicit none
+
+  integer,codimension[*] :: a
+  integer,dimension(10),codimension[*] :: b,c
+  integer,dimension(9,10),codimension[*] :: m
+  integer,dimension(10) :: t
+  integer :: i,j
+
+  a = 10
+  b(1:5) = 1
+  b(6:10) = -1
+  c(1:5) = 1
+  c(6:10) = -1
+
+  t(:) = b(:)
+  t(1:5) = b(2:6)
+
+  do i=1,9
+    m(i,:) = (/ (j, j = 1, 10) /)
+  enddo
+
+  sync all
+
+  a = a[1]
+  if (this_image() == 1) write(*,*) 'OK',a
+
+  t = (/ (j, j = 1, 10) /)
+
+  if(this_image() == 1) then
+    c = m(1,:)[1]
+    if(any(c(:) /= t(:))) then
+      call abort()
+    else
+      write(*,*) 'ok get row'
+    endif
+  endif
+
+  sync all
+
+  if(this_image() == 1) then
+    do i=1,10
+      if(m(9,i)[1] /= t(i)) then
+	write(*,*) 'pos',i,'value get',m(9,i)[1],'value t',t(i)
+	call abort()
+      endif
+    enddo
+  endif
+
+  if(this_image() == 1) write(*,*) 'Ok get element from matrix'
+
+  sync all
+
+  m(9,:) = 1
+
+  if(this_image() == 1) then
+    do i=1,10
+      m(9,i)[1] = i
+      if(m(9,i)[1] /= t(i)) then
+        write(*,*) 'pos',i,'value get',m(9,i)[1],'value t',t(i)
+        call abort()
+      endif
+    enddo
+  endif
+
+  if(this_image() == 1 ) write(*,*) 'Ok put element from matrix'
+
+  t(:) = b(:)
+  t(1:5) = b(2:6)
+
+  c(1:5) = 1
+  c(6:10) = -1
+
+  sync all
+
+  if(this_image() == 1) then
+    b(1:5)[1] = b(2:6)
+    if(any(b(:) /= t(:))) then
+      call abort()
+    else
+      write(*,*) 'OK put overlapped'
+    endif
+  endif
+
+  b(1:5) = 1
+  b(6:10) = -1
+
+  sync all
+
+  if(this_image() == 1) then
+    b(1:5)[1] = b(2:6)[1]
+    if(any(b(:) /= t(:))) then
+      call abort()
+    else
+      write(*,*) 'OK putget overlapped'
+    endif
+  endif
+
+  t(:) = c(:)
+  t(10:1:-1) = t(:)
+
+  sync all
+
+  if(this_image() == 1) then
+    c(10:1:-1)[1] = c(:)
+    if(any(t(:) /= c(:))) then
+      write(*,*) 'Error in put reversed'
+      write(*,*) c
+      write(*,*) t
+      call abort()
+    else
+      write(*,*) 'OK put reversed'
+    endif
+  endif
+
+  c(1:5) = 1
+  c(6:10) = -1
+
+  t(:) = c(:)
+  t(10:1:-1) = t(:)
+
+  if(this_image() == 1) then
+    c(:) = c(10:1:-1)[1]
+    if(any(t(:) /= c(:))) then
+      write(*,*) c
+      write(*,*) t
+      call abort()
+    else
+      write(*,*) 'OK get reversed'
+    endif
+  endif
+
+end program
diff --git a/src/tests/unit/send-get/send_array_test.f90 b/src/tests/unit/send-get/send_array_test.f90
new file mode 100644
index 0000000..997b7af
--- /dev/null
+++ b/src/tests/unit/send-get/send_array_test.f90
@@ -0,0 +1,398 @@
+!
+! This program does a correctness check for
+! ARRAY[idx] = SCALAR, ARRAY[idx] = ARRAY and SCALAR[idx] = SCALAR
+!
+program main
+  implicit none
+  integer, parameter :: n = 3
+  integer, parameter :: m = 4
+
+  ! Allocatable coarrays
+  call one(-5, 1)
+  call one(0, 0)
+  call one(1, -5)
+  call one(0, -11)
+
+  ! Static coarrays
+  call two()
+  call three()
+  write(*,*) 'Test passed'
+contains
+  subroutine one(lb1, lb2)
+    integer, value :: lb1, lb2
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, allocatable :: caf(:,:)[:]
+    integer, allocatable :: a(:,:), b(:,:)
+
+    allocate(caf(lb1:n+lb1-1, lb2:m+lb2-1)[*], &
+         a(lb1:n+lb1-1, lb2:m+lb2-1), &
+         b(lb1:n+lb1-1, lb2:m+lb2-1))
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    caf = -42
+    a = -42
+    a(:,:) = b(lb1, lb2)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(lb1, lb2)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    caf = -42
+    a = -42
+    a(:,:) = b(:, :)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(:, :)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Scalar assignment
+    caf = -42
+    a = -42
+    do j = lb2, m+lb2-1
+      do i = n+lb1-1, 1, -2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    do j = lb2, m+lb2-1
+      do i = 1, n+lb1-1, 2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    sync all
+    if (this_image() == 1) then
+      do j = lb2, m+lb2-1
+        do i = n+lb1-1, 1, -2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+      do j = lb2, m+lb2-1
+        do i = 1, n+lb1-1, 2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = b(lb1, lb2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(lb1, lb2)
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                         = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    end if
+                    sync all
+
+                    if (this_image() == num_images()) then
+                      if (any (a /= caf)) then
+                        print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                             lb2,":",m+lb2-1
+                        print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                             ", ", j,":",j_e,":",j_s*i_sgn2
+                        print *, i
+                        print *, a
+                        print *, caf
+                        print *, a-caf
+                        call abort()
+                      endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine one
+
+  subroutine two()
+    integer, parameter :: lb1 = -5, lb2 = 1
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, save :: caf(lb1:n+lb1-1, lb2:m+lb2-1)[*]
+    integer, save :: a(lb1:n+lb1-1, lb2:m+lb2-1)
+    integer, save :: b(lb1:n+lb1-1, lb2:m+lb2-1)
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    caf = -42
+    a = -42
+    a(:,:) = b(lb1, lb2)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(lb1, lb2)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    caf = -42
+    a = -42
+    a(:,:) = b(:, :)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(:, :)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Scalar assignment
+    caf = -42
+    a = -42
+    do j = lb2, m+lb2-1
+      do i = n+lb1-1, 1, -2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    do j = lb2, m+lb2-1
+      do i = 1, n+lb1-1, 2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    sync all
+    if (this_image() == 1) then
+      do j = lb2, m+lb2-1
+        do i = n+lb1-1, 1, -2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+      do j = lb2, m+lb2-1
+        do i = 1, n+lb1-1, 2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = b(lb1, lb2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(lb1, lb2)
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                         = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    end if
+                    sync all
+
+                    if (this_image() == num_images()) then
+                      if (any (a /= caf)) then
+                        print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                             lb2,":",m+lb2-1
+                        print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                             ", ", j,":",j_e,":",j_s*i_sgn2
+                        print *, i
+                        print *, a
+                        print *, caf
+                        print *, a-caf
+                        call abort()
+                      endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine two
+
+  subroutine three()
+    integer, parameter :: lb1 = 0, lb2 = 0
+
+    integer :: i_sgn1, i_sgn2, i, i_e, i_s, j, j_e, j_s
+    integer, save :: caf(lb1:n+lb1-1, lb2:m+lb2-1)[*]
+    integer, save :: a(lb1:n+lb1-1, lb2:m+lb2-1)
+    integer, save :: b(lb1:n+lb1-1, lb2:m+lb2-1)
+
+    b = reshape([(i*33, i = 1, size(b))], shape(b))
+
+    ! Whole array: ARRAY = SCALAR
+    caf = -42
+    a = -42
+    a(:,:) = b(lb1, lb2)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(lb1, lb2)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Whole array: ARRAY = ARRAY
+    caf = -42
+    a = -42
+    a(:,:) = b(:, :)
+    sync all
+    if (this_image() == 1) then
+      caf(:,:)[num_images()] = b(:, :)
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Scalar assignment
+    caf = -42
+    a = -42
+    do j = lb2, m+lb2-1
+      do i = n+lb1-1, 1, -2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    do j = lb2, m+lb2-1
+      do i = 1, n+lb1-1, 2
+        a(i,j) = b(i,j)
+      end do
+    end do
+    sync all
+    if (this_image() == 1) then
+      do j = lb2, m+lb2-1
+        do i = n+lb1-1, 1, -2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+      do j = lb2, m+lb2-1
+        do i = 1, n+lb1-1, 2
+          caf(i,j)[num_images()] = b(i, j)
+        end do
+      end do
+    end if
+    sync all
+    if (this_image() == num_images()) then
+      if (any (a /= caf)) &
+           call abort()
+    end if
+
+    ! Array sections with different ranges and pos/neg strides
+    do i_sgn1 = -1, 1, 2
+      do i_sgn2 = -1, 1, 2
+        do i=lb1, n+lb1-1
+          do i_e=lb1, n+lb1-1
+            do i_s=1, n
+              do j=lb2, m+lb2-1
+                do j_e=lb2, m+lb2-1
+                  do j_s=1, m
+                    ! ARRAY = SCALAR
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) = b(lb1, lb2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(lb1, lb2)
+                    end if
+                    sync all
+
+                    ! ARRAY = ARRAY
+                    caf = -42
+                    a = -42
+                    a(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2) &
+                         = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    sync all
+                    if (this_image() == 1) then
+                      caf(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)[num_images()] &
+                           = b(i:i_e:i_s*i_sgn1, j:j_e:j_s*i_sgn2)
+                    end if
+                    sync all
+
+                    if (this_image() == num_images()) then
+                      if (any (a /= caf)) then
+                        print '(*(g0))', "bounds: ", lb1,":",n+lb1-1,", ", &
+                             lb2,":",m+lb2-1
+                        print '(*(g0))', "section: ", i,":",i_e,":",i_s*i_sgn1, &
+                             ", ", j,":",j_e,":",j_s*i_sgn2
+                        print *, i
+                        print *, a
+                        print *, caf
+                        print *, a-caf
+                        call abort()
+                      endif
+                    end if
+                  end do
+                end do
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine three
+end program main
diff --git a/src/tests/unit/send-get/strided_get.f90 b/src/tests/unit/send-get/strided_get.f90
new file mode 100644
index 0000000..6ed732f
--- /dev/null
+++ b/src/tests/unit/send-get/strided_get.f90
@@ -0,0 +1,25 @@
+program strided_get
+  use iso_c_binding, only : DPC=>c_double
+  implicit none
+
+  integer :: i,me,np
+  integer,allocatable :: a(:,:,:,:)[:],b(:,:,:,:)
+  complex(kind=DPC),allocatable :: ac(:,:,:,:)[:],bc(:,:,:,:)
+
+  me = this_image()
+  np = num_images()
+
+  allocate(ac(0:11,-10:-5,-1:0,-1:5)[*],bc(6,6,2,7))
+
+  ac = me
+  bc = me
+
+  sync all
+
+  if(me == 2) then
+    bc(1:2,:,:,:) = ac(0:1,:,:,:)[me-1]
+    if(any(bc(1:2,:,:,:) /= 1)) call abort()
+    write(*,*) 'Test passed.'
+  endif
+
+end program
diff --git a/src/tests/unit/send-get/whole_get_array.f90 b/src/tests/unit/send-get/whole_get_array.f90
new file mode 100644
index 0000000..e90e9bf
--- /dev/null
+++ b/src/tests/unit/send-get/whole_get_array.f90
@@ -0,0 +1,89 @@
+program whole_array_get
+  implicit none
+
+  integer,allocatable :: x1(:)[:],y1(:)
+  integer,allocatable :: x2(:,:)[:],y2(:,:)
+  integer,allocatable :: x3(:,:,:)[:],y3(:,:,:)
+  integer,parameter   :: n = 10
+  integer :: me,np,i,j,k
+
+  me = this_image()
+  np = num_images()
+
+  allocate(x1(n)[*],y1(n))
+
+  x1 = me
+  y1 = 0
+
+  sync all
+
+  if(me == 1) then
+     y1 = x1(:)[me+1]
+     if(any(y1 /= 2)) then
+        write(*,*) 'Test 1 fails'
+        call abort()
+     end if
+  end if
+
+  deallocate(x1)
+  allocate(x2(1:n,0:n-1)[*],y2(n,n))
+
+  x2 = me
+  y1 = 0; y2 = 0
+
+  sync all
+  if(me == 1) then
+     y2 = x2(:,:)[np]
+     if(any(y2 /= np)) then
+        write(*,*) 'Test 2 fails'
+        call abort()
+     end if
+  end if
+
+  sync all
+
+  x2(:,n/2) = x2(:,n/2) + n/2
+
+  sync all
+
+  if(me == 1) then
+     y1 = x2(:,n/2)[me+1]
+     if(any(y1 /= 2+n/2)) then
+        write(*,*) 'Test 3 fails'
+        call abort()
+     end if
+  end if
+
+  deallocate(y1,x2,y2)
+  allocate(x3(0:n-1,1:n,-1:n-2)[*],y3(n,n,n))
+
+  x3 = me; y3 = 0
+
+  sync all
+
+  if(me == 1) then
+     y3 = x3(:,:,:)[me+1]
+     if(any(y3 /= me+1)) then
+        write(*,*) 'Test 4 fails'
+        call abort()
+     end if
+  endif
+
+  sync all
+
+  x3(:,:,n/2) = me + n/2
+  y3 = 0
+
+  sync all
+
+  if(me == 1) then
+     y3(:,n/2,:) = x3(:,:,n/2)[me+1]
+     if(any(y3(:,n/2,:) /= me+1+n/2)) then
+        write(*,*) 'Test 5 fails'
+        call abort()
+     end if
+  endif
+
+  if(me == 1) write(*,*) 'Test passed.'
+
+end program whole_array_get
diff --git a/src/tests/unit/simple/CMakeLists.txt b/src/tests/unit/simple/CMakeLists.txt
new file mode 100644
index 0000000..e216fbb
--- /dev/null
+++ b/src/tests/unit/simple/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Fortran tests
+add_executable(sync_images syncimages.f90)
+target_link_libraries(sync_images OpenCoarrays)
+
+add_executable(is_there_a_better_name syncimages2.f90)
+target_link_libraries(is_there_a_better_name OpenCoarrays)
+
+add_executable(increment_my_neighbor test1Caf.f90)
+target_link_libraries(increment_my_neighbor OpenCoarrays)
+
+add_executable(atomics testAtomics.f90)
+target_link_libraries(atomics OpenCoarrays)
+
+# C tests
+#include(CMakeForceCompiler)
+#CMAKE_FORCE_C_COMPILER(mpicc GNU)
+#add_executable(C_sync_images syncimages2.c ../../../mpi/mpi_caf.c ../../../common/caf_auxiliary.c)
diff --git a/src/tests/unit/simple/Makefile b/src/tests/unit/simple/Makefile
new file mode 100644
index 0000000..2938cf8
--- /dev/null
+++ b/src/tests/unit/simple/Makefile
@@ -0,0 +1,31 @@
+.SUFFIXES: .f90 .armci .mpi .gasnet
+
+TOP=../../..
+include $(TOP)/make.inc
+
+OBJS=increment_my_neighbor.o add_myself_to_my_neighbor.o send_array_test.o
+EXES=$(OBJS:.o=.exe)
+
+
+all: $(OBJS) armci gasnet mpi
+
+
+clean:
+	/bin/rm -fr *.o *.armci *.mpi *.gasnet
+
+
+mpi: $(EXES:.exe=.mpi)
+
+armci: $(EXES:.exe=.armci)
+
+gasnet: $(EXES:.exe=.gasnet)
+
+
+.o.mpi:
+	$(MPFC) -o $@ $< -lcaf_mpi -L$(TOP)/mpi
+.o.armci:
+	$(MPFC)  -o $@ $< -lcaf_armci -L$(TOP)/armci $(ARMCI_LDFLAGS)
+.o.gasnet:
+	$(MPFC)  -o $@ $< -lcaf_gasnet -L$(TOP)/gasnet $(GASNET_LDFLAGS)
+.f90.o:
+	$(FC) -fcoarray=lib $(FFLAGS) -c $< -o $@
diff --git a/src/tests/unit/simple/syncimages.f90 b/src/tests/unit/simple/syncimages.f90
new file mode 100644
index 0000000..33309f8
--- /dev/null
+++ b/src/tests/unit/simple/syncimages.f90
@@ -0,0 +1,58 @@
+! syncimages test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+program syncimages
+  implicit none
+
+  integer :: me,ne,i
+  integer :: p[*] = 0
+  logical :: test[*] = .true.
+
+  me = this_image()
+  ne = num_images()
+
+  if(me == 1) then
+    p = 1
+  else
+    sync images( me-1 )
+    p = p[me-1] +1
+  endif
+
+  if(me<ne) sync images( me+1 )
+
+  if(me /= p) test = .false.
+
+  sync all
+
+  if(me == 1) then
+    do i=1,ne
+      if(test[i].eqv..false.) error stop "Test failed."
+    enddo
+  endif
+
+  if(me==1) print *,"Test passed."
+
+end program
diff --git a/src/tests/unit/simple/syncimages2.c b/src/tests/unit/simple/syncimages2.c
new file mode 100644
index 0000000..8c1b3bd
--- /dev/null
+++ b/src/tests/unit/simple/syncimages2.c
@@ -0,0 +1,114 @@
+/* syncimages2 test program
+
+  Copyright (c) 2012-2014, Sourcery, Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+      * Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+      * Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+      * Neither the name of the Sourcery, Inc., nor the
+        names of its contributors may be used to endorse or promote products
+        derived from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "libcaf.h"
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#include <stdbool.h>
+
+
+static struct timezone tz;
+static struct timeval start_time, finish_time;
+
+/* Start measuring a time delay */
+void start_timer(void)
+{
+    gettimeofday( &start_time, &tz);
+}
+
+/* Retunrn elapsed time in milliseconds */
+double elapsed_time(void)
+{
+    gettimeofday( &finish_time, &tz);
+    return(1000.0*(finish_time.tv_sec - start_time.tv_sec) +
+           (finish_time.tv_usec - start_time.tv_usec)/1000.0 );
+}
+
+/* Return the stopping time in milliseconds */
+double stop_time(void)
+{
+    gettimeofday( &finish_time, &tz);
+    return(1000.0*finish_time.tv_sec + finish_time.tv_usec/1000.0);
+}
+
+int main(int argc, char **argv)
+{
+  int info = 0, me,np,n=1,i, *images;
+  double *a_d,*d;
+  caf_token_t token;
+  ptrdiff_t size=sizeof(double);
+  char errmsg[255];
+  bool check = true;
+
+  /* if(argc == 1) */
+  /*   { */
+  /*     printf("Please insert message size\n"); */
+  /*     return 1; */
+  /*   } */
+
+  /* sscanf(argv[1],"%d",&n); */
+
+  /* n = (int)n/sizeof(double); */
+
+  /* size = n*sizeof(double); */
+
+  _gfortran_caf_init (&argc, &argv);
+
+  me = _gfortran_caf_this_image (1);
+  np = _gfortran_caf_num_images (1, 1);
+
+  a_d = _gfortran_caf_register(size,CAF_REGTYPE_COARRAY_STATIC,&token,&info,errmsg,255);
+
+  /* start_timer(); */
+  images = calloc(np,sizeof(int));
+
+  if(me==1)
+    {
+      images[0] = -1;
+      _gfortran_caf_sync_images(1,images,&info,errmsg,255);
+    }
+  else
+    {
+      images[0] = 1;
+      _gfortran_caf_sync_images(1,images,&info,errmsg,255);
+
+    }
+
+  printf("proc %d dval: %lf\n",me);
+
+  /* stop_time(); */
+
+  if(info!=0)
+    printf("Error\n");
+
+  _gfortran_caf_finalize();
+
+  return 0;
+}
diff --git a/src/tests/unit/simple/syncimages2.f90 b/src/tests/unit/simple/syncimages2.f90
new file mode 100644
index 0000000..5674181
--- /dev/null
+++ b/src/tests/unit/simple/syncimages2.f90
@@ -0,0 +1,44 @@
+! syncimages2 test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+
+! The test passes if it terminates
+! If the test fails you will get an error or a non-termination.
+
+program syncimages2
+  implicit none
+
+  if(this_image() == 1) then
+    sync images(*)
+  else
+    sync images(1)
+  endif
+
+  sync all
+
+  if(this_image()==1) print *,"Test passed."
+
+end program
diff --git a/src/tests/unit/simple/test1Caf.f90 b/src/tests/unit/simple/test1Caf.f90
new file mode 100644
index 0000000..fea1905
--- /dev/null
+++ b/src/tests/unit/simple/test1Caf.f90
@@ -0,0 +1,63 @@
+! test1caf test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+
+program test1caf
+  implicit none
+  integer, parameter :: num_local_elems=3,a_initial=1,b_initial=2
+  integer :: a(num_local_elems)[*]=a_initial,b(num_local_elems)[*]=b_initial
+  integer :: i,me,np,left,right
+
+  me = this_image()
+  np = num_images()
+
+  left  = merge(np,me-1,me==1)
+  right = merge(1,me+1,me==np)
+
+  if (mod(me,2).eq.0) then
+     a(:)[right] = a(:)[right]+me
+  else
+     b(:)[left] = b(:)[left]+me
+  end if
+
+  if(me==1) then
+     write(*,*) me, a, b
+  else
+     sync images(me-1)
+     write(*,*) me, a, b
+  end if
+
+  if(me < np) sync images(me+1)
+
+  if (mod(me,2).eq.0) then
+    if ( any(a(:)[right]/=a_initial+me)) error stop "Test failed."
+  else
+    if ( any(b(:)[left]/=b_initial+me)) error stop "Test failed."
+  end if
+
+  if (me==1) print *,"Test passed."
+
+end program test1caf
diff --git a/src/tests/unit/simple/testAtomics.f90 b/src/tests/unit/simple/testAtomics.f90
new file mode 100644
index 0000000..ebf00bf
--- /dev/null
+++ b/src/tests/unit/simple/testAtomics.f90
@@ -0,0 +1,28 @@
+program atomic
+use iso_fortran_env
+implicit none
+
+integer :: me,np,res
+integer(atomic_int_kind) :: atom[*]
+
+me = this_image()
+np = num_images()
+
+call atomic_define(atom[1],0)
+
+sync all
+
+call ATOMIC_ADD (atom[1], me)
+
+sync all
+
+if(me == 1) then
+  call atomic_ref(res,atom[1])
+  if(res /= (np*(np+1))/2) then
+    write(*,*) 'res',res
+    call abort()
+  endif
+  write(*,*) 'OK'
+endif
+
+end program
diff --git a/src/tests/unit/sync/CMakeLists.txt b/src/tests/unit/sync/CMakeLists.txt
new file mode 100644
index 0000000..c8ba3bf
--- /dev/null
+++ b/src/tests/unit/sync/CMakeLists.txt
@@ -0,0 +1,23 @@
+add_executable(syncall syncall.f90)
+target_link_libraries(syncall OpenCoarrays)
+
+add_executable(syncimages syncimages.f90)
+target_link_libraries(syncimages OpenCoarrays)
+
+add_executable(duplicate_syncimages duplicate_syncimages.f90)
+target_link_libraries(duplicate_syncimages OpenCoarrays)
+
+#add_executable(syncimages_status syncimages_status.f90)
+#target_link_libraries(syncimages_status OpenCoarrays)
+
+#add_executable(send_array send_array_test.f90)
+#target_link_libraries(send_array OpenCoarrays)
+
+#add_executable(get_with_offset_1d get_with_offset_1d.f90)
+#target_link_libraries(get_with_offset_1d OpenCoarrays)
+
+#add_executable(whole_get_array whole_get_array.f90)
+#target_link_libraries(whole_get_array OpenCoarrays)
+
+#add_executable(strided_get strided_get.f90)
+#target_link_libraries(strided_get OpenCoarrays)
diff --git a/src/tests/unit/sync/duplicate_syncimages.f90 b/src/tests/unit/sync/duplicate_syncimages.f90
new file mode 100644
index 0000000..8e7c2f2
--- /dev/null
+++ b/src/tests/unit/sync/duplicate_syncimages.f90
@@ -0,0 +1,47 @@
+! syncimages test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+program duplicate_syncimages
+  implicit none
+
+  integer :: me,i,array(4)
+  integer :: stat = 0
+
+  me = this_image()
+
+  if(me == 1) then
+     array(1) = 5; array(2) = 6; array(3) = 7; array(4) = 5
+     sync images(array,stat=stat)
+     if(stat == 3) then
+        print *,"Test passed."
+     else
+        print *,"Test failed."
+     endif
+  endif
+
+  sync all
+
+end program
diff --git a/src/tests/unit/sync/syncall.f90 b/src/tests/unit/sync/syncall.f90
new file mode 100644
index 0000000..d1e16d5
--- /dev/null
+++ b/src/tests/unit/sync/syncall.f90
@@ -0,0 +1,54 @@
+! syncall test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+program syncall
+  implicit none
+
+  integer :: me,np,i
+  integer :: scalar[*]
+
+  me = this_image()
+  np = num_images()
+  scalar = -1
+
+  if(me /= 1) call sleep(1)
+
+  scalar = 1
+
+  sync all
+
+  if(me == 1) then
+     do i=1,np
+        if(scalar[i] /= 1) then
+           write(*,*) 'Test failed.'
+           stop
+        endif
+     end do
+  end if
+
+  if(me == 1) print *,'Test passed.'
+
+end program syncall
diff --git a/src/tests/unit/sync/syncimages.f90 b/src/tests/unit/sync/syncimages.f90
new file mode 100644
index 0000000..33309f8
--- /dev/null
+++ b/src/tests/unit/sync/syncimages.f90
@@ -0,0 +1,58 @@
+! syncimages test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+program syncimages
+  implicit none
+
+  integer :: me,ne,i
+  integer :: p[*] = 0
+  logical :: test[*] = .true.
+
+  me = this_image()
+  ne = num_images()
+
+  if(me == 1) then
+    p = 1
+  else
+    sync images( me-1 )
+    p = p[me-1] +1
+  endif
+
+  if(me<ne) sync images( me+1 )
+
+  if(me /= p) test = .false.
+
+  sync all
+
+  if(me == 1) then
+    do i=1,ne
+      if(test[i].eqv..false.) error stop "Test failed."
+    enddo
+  endif
+
+  if(me==1) print *,"Test passed."
+
+end program
diff --git a/src/tests/unit/sync/syncimages2.f90 b/src/tests/unit/sync/syncimages2.f90
new file mode 100644
index 0000000..5674181
--- /dev/null
+++ b/src/tests/unit/sync/syncimages2.f90
@@ -0,0 +1,44 @@
+! syncimages2 test
+!
+! Copyright (c) 2012-2014, Sourcery, Inc.
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions are met:
+!     * Redistributions of source code must retain the above copyright
+!       notice, this list of conditions and the following disclaimer.
+!     * Redistributions in binary form must reproduce the above copyright
+!       notice, this list of conditions and the following disclaimer in the
+!       documentation and/or other materials provided with the distribution.
+!     * Neither the name of the Sourcery, Inc., nor the
+!       names of its contributors may be used to endorse or promote products
+!       derived from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+! ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+! WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+! DISCLAIMED. IN NO EVENT SHALL SOURCERY, INC., BE LIABLE FOR ANY
+! DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+! (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+! ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+!
+
+! The test passes if it terminates
+! If the test fails you will get an error or a non-termination.
+
+program syncimages2
+  implicit none
+
+  if(this_image() == 1) then
+    sync images(*)
+  else
+    sync images(1)
+  endif
+
+  sync all
+
+  if(this_image()==1) print *,"Test passed."
+
+end program
diff --git a/src/tests/unit/sync/syncimages_status.f90 b/src/tests/unit/sync/syncimages_status.f90
new file mode 100644
index 0000000..52a2a4b
--- /dev/null
+++ b/src/tests/unit/sync/syncimages_status.f90
@@ -0,0 +1,22 @@
+! SYNC IMAGES(*) with the STAT=STAT_STOPPED_IMAGE specifier
+! Based on a test taken from UH caf-testsuite
+
+program sync_images_stat
+  use, intrinsic:: iso_fortran_env
+  implicit none
+
+  integer :: stat_var = 0, me
+
+  me = this_image()
+
+  if (me /= 1 ) then
+     call sleep(1)
+     sync images(*,STAT=stat_var)
+     if ( stat_var /= STAT_STOPPED_IMAGE) then
+        print *, "Error:stat_var /= STAT_STOPPED_IMAGE: ", me
+        ERROR STOP 1
+     end if
+     if(me == 2) print *, 'Test passed.'
+  end if
+
+end program sync_images_stat

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/open-coarrays.git



More information about the debian-science-commits mailing list