[mlpack] 52/53: Remove features that are not ready for release.

Mon Nov 14 00:46:51 UTC 2016

This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit 651ea9bf768b5cb75eb8f1986786932649b3d5cc
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Oct 31 23:39:30 2016 +0900

    Remove features that are not ready for release.
---
 src/mlpack/CMakeLists.txt                          |   1 -
 src/mlpack/bindings/CMakeLists.txt                 |   4 -
 src/mlpack/bindings/matlab/CMakeLists.txt          | 154 ----
 src/mlpack/bindings/matlab/allkfn/CMakeLists.txt   |  19 -
 src/mlpack/bindings/matlab/allkfn/allkfn.cpp       | 194 -----
 src/mlpack/bindings/matlab/allkfn/allkfn.m         |  58 --
 src/mlpack/bindings/matlab/allknn/CMakeLists.txt   |  19 -
 src/mlpack/bindings/matlab/allknn/allknn.cpp       | 279 ------
 src/mlpack/bindings/matlab/allknn/allknn.m         |  60 --
 src/mlpack/bindings/matlab/emst/CMakeLists.txt     |  19 -
 src/mlpack/bindings/matlab/emst/emst.cpp           |  72 --
 src/mlpack/bindings/matlab/emst/emst.m             |  52 --
 src/mlpack/bindings/matlab/gmm/CMakeLists.txt      |  19 -
 src/mlpack/bindings/matlab/gmm/gmm.cpp             | 129 ---
 src/mlpack/bindings/matlab/gmm/gmm.m               |  28 -
 src/mlpack/bindings/matlab/hmm/hmm_generate.cpp    | 373 --------
 src/mlpack/bindings/matlab/hmm/hmm_generate.m      |  28 -
 .../bindings/matlab/kernel_pca/CMakeLists.txt      |  19 -
 .../bindings/matlab/kernel_pca/kernel_pca.cpp      | 136 ---
 src/mlpack/bindings/matlab/kernel_pca/kernel_pca.m |  71 --
 src/mlpack/bindings/matlab/kmeans/CMakeLists.txt   |  19 -
 src/mlpack/bindings/matlab/kmeans/kmeans.cpp       | 175 ----
 src/mlpack/bindings/matlab/kmeans/kmeans.m         |  28 -
 src/mlpack/bindings/matlab/lars/CMakeLists.txt     |  19 -
 src/mlpack/bindings/matlab/lars/lars.cpp           |  58 --
 src/mlpack/bindings/matlab/lars/lars.m             |  48 -
 src/mlpack/bindings/matlab/nca/CMakeLists.txt      |  19 -
 src/mlpack/bindings/matlab/nca/nca.cpp             |  55 --
 src/mlpack/bindings/matlab/nca/nca.m               |  24 -
 src/mlpack/bindings/matlab/nmf/CMakeLists.txt      |  19 -
 src/mlpack/bindings/matlab/nmf/nmf.cpp             | 106 ---
 src/mlpack/bindings/matlab/nmf/nmf.m               |  58 --
 src/mlpack/bindings/matlab/pca/CMakeLists.txt      |  19 -
 src/mlpack/bindings/matlab/pca/pca.cpp             |  62 --
 src/mlpack/bindings/matlab/pca/pca.m               |  33 -
 .../bindings/matlab/range_search/CMakeLists.txt    |  19 -
 .../bindings/matlab/range_search/range_search.cpp  | 325 -------
 .../bindings/matlab/range_search/range_search.m    |  47 -
 src/mlpack/methods/CMakeLists.txt                  |   3 +-
 src/mlpack/methods/ann/CMakeLists.txt              |  16 +-
 .../ann/activation_functions/CMakeLists.txt        |  18 -
 .../ann/activation_functions/identity_function.hpp |  96 --
 .../ann/activation_functions/logistic_function.hpp | 114 ---
 .../activation_functions/rectifier_function.hpp    | 115 ---
 .../ann/activation_functions/softsign_function.hpp | 134 ---
 .../ann/activation_functions/tanh_function.hpp     | 105 ---
 src/mlpack/methods/ann/cnn.hpp                     | 448 ----------
 src/mlpack/methods/ann/cnn_impl.hpp                | 289 -------
 .../methods/ann/convolution_rules/CMakeLists.txt   |  17 -
 .../methods/ann/convolution_rules/border_modes.hpp |  33 -
 .../ann/convolution_rules/fft_convolution.hpp      | 221 -----
 .../ann/convolution_rules/naive_convolution.hpp    | 190 ----
 .../ann/convolution_rules/svd_convolution.hpp      | 199 -----
 src/mlpack/methods/ann/ffn.hpp                     | 447 ----------
 src/mlpack/methods/ann/ffn_impl.hpp                | 296 -------
 src/mlpack/methods/ann/init_rules/CMakeLists.txt   |  18 -
 .../kathirvalavakumar_subavathi_init.hpp           | 121 ---
 .../methods/ann/init_rules/nguyen_widrow_init.hpp  | 117 ---
 src/mlpack/methods/ann/init_rules/oivs_init.hpp    | 130 ---
 .../methods/ann/init_rules/orthogonal_init.hpp     |  82 --
 src/mlpack/methods/ann/init_rules/zero_init.hpp    |  65 --
 src/mlpack/methods/ann/layer/CMakeLists.txt        |  30 -
 src/mlpack/methods/ann/layer/base_layer.hpp        | 223 -----
 src/mlpack/methods/ann/layer/bias_layer.hpp        | 208 -----
 .../ann/layer/binary_classification_layer.hpp      | 106 ---
 src/mlpack/methods/ann/layer/constant_layer.hpp    | 121 ---
 src/mlpack/methods/ann/layer/conv_layer.hpp        | 324 -------
 src/mlpack/methods/ann/layer/dropconnect_layer.hpp | 361 --------
 src/mlpack/methods/ann/layer/dropout_layer.hpp     | 252 ------
 src/mlpack/methods/ann/layer/empty_layer.hpp       | 133 ---
 src/mlpack/methods/ann/layer/glimpse_layer.hpp     | 484 -----------
 src/mlpack/methods/ann/layer/hard_tanh_layer.hpp   | 259 ------
 src/mlpack/methods/ann/layer/layer_traits.hpp      |  91 --
 src/mlpack/methods/ann/layer/leaky_relu_layer.hpp  | 240 -----
 src/mlpack/methods/ann/layer/linear_layer.hpp      | 289 -------
 src/mlpack/methods/ann/layer/log_softmax_layer.hpp | 131 ---
 src/mlpack/methods/ann/layer/lstm_layer.hpp        | 418 ---------
 .../ann/layer/multiclass_classification_layer.hpp  |  98 ---
 .../methods/ann/layer/multiply_constant_layer.hpp  | 113 ---
 .../ann/layer/negative_log_likelihood_layer.hpp    | 127 ---
 src/mlpack/methods/ann/layer/one_hot_layer.hpp     |  96 --
 src/mlpack/methods/ann/layer/pooling_layer.hpp     | 267 ------
 src/mlpack/methods/ann/layer/recurrent_layer.hpp   | 192 ----
 .../methods/ann/layer/reinforce_normal_layer.hpp   | 139 ---
 src/mlpack/methods/ann/layer/softmax_layer.hpp     | 114 ---
 src/mlpack/methods/ann/layer/sparse_bias_layer.hpp | 177 ----
 .../methods/ann/layer/sparse_input_layer.hpp       | 180 ----
 .../methods/ann/layer/sparse_output_layer.hpp      | 227 -----
 .../methods/ann/layer/vr_class_reward_layer.hpp    | 171 ----
 src/mlpack/methods/ann/network_traits.hpp          |  55 --
 src/mlpack/methods/ann/network_util.hpp            | 247 ------
 src/mlpack/methods/ann/network_util_impl.hpp       | 286 ------
 .../ann/performance_functions/CMakeLists.txt       |  17 -
 .../ann/performance_functions/cee_function.hpp     |  74 --
 .../ann/performance_functions/mse_function.hpp     |  61 --
 .../ann/performance_functions/sparse_function.hpp  | 141 ---
 .../ann/performance_functions/sse_function.hpp     |  64 --
 .../methods/ann/pooling_rules/CMakeLists.txt       |  15 -
 .../methods/ann/pooling_rules/max_pooling.hpp      |  56 --
 .../methods/ann/pooling_rules/mean_pooling.hpp     |  56 --
 src/mlpack/methods/ann/rnn.hpp                     | 799 -----------------
 src/mlpack/methods/ann/rnn_impl.hpp                | 357 --------
 src/mlpack/methods/mvu/CMakeLists.txt              |  17 -
 src/mlpack/methods/mvu/mvu.cpp                     | 112 ---
 src/mlpack/methods/mvu/mvu.hpp                     |  48 -
 src/mlpack/methods/mvu/mvu_main.cpp                |  80 --
 src/mlpack/methods/rmva/CMakeLists.txt             |  17 -
 src/mlpack/methods/rmva/rmva.hpp                   | 963 ---------------------
 src/mlpack/methods/rmva/rmva_impl.hpp              | 740 ----------------
 src/mlpack/methods/rmva/rmva_main.cpp              | 295 -------
 src/mlpack/tests/CMakeLists.txt                    |  10 -
 src/mlpack/tests/activation_functions_test.cpp     | 328 -------
 src/mlpack/tests/ada_delta_test.cpp                | 110 ---
 src/mlpack/tests/adam_test.cpp                     | 109 ---
 src/mlpack/tests/convolution_test.cpp              | 373 --------
 src/mlpack/tests/convolutional_network_test.cpp    | 146 ----
 src/mlpack/tests/feedforward_network_test.cpp      | 509 -----------
 src/mlpack/tests/init_rules_test.cpp               | 126 ---
 src/mlpack/tests/layer_traits_test.cpp             |  69 --
 src/mlpack/tests/lstm_peephole_test.cpp            |  92 --
 src/mlpack/tests/network_util_test.cpp             | 149 ----
 src/mlpack/tests/pooling_rules_test.cpp            |  80 --
 src/mlpack/tests/recurrent_network_test.cpp        | 604 -------------
 src/mlpack/tests/rmsprop_test.cpp                  | 162 ----
 124 files changed, 2 insertions(+), 19077 deletions(-)

diff --git a/src/mlpack/CMakeLists.txt b/src/mlpack/CMakeLists.txt
index c5caca1..1232813 100644
--- a/src/mlpack/CMakeLists.txt
+++ b/src/mlpack/CMakeLists.txt
@@ -6,7 +6,6 @@ set(MLPACK_SRCS ${MLPACK_SRCS} "${CMAKE_CURRENT_SOURCE_DIR}/core.hpp")
 
 ## Recurse into both core/ and methods/.
 set(DIRS
-  bindings
   core
   methods
 )
diff --git a/src/mlpack/bindings/CMakeLists.txt b/src/mlpack/bindings/CMakeLists.txt
deleted file mode 100644
index 19aad5c..0000000
--- a/src/mlpack/bindings/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# Recurse into individual binding subdirectories, if we are supposed to.
-if(MATLAB_BINDINGS)
-  add_subdirectory(matlab)
-endif()
diff --git a/src/mlpack/bindings/matlab/CMakeLists.txt b/src/mlpack/bindings/matlab/CMakeLists.txt
deleted file mode 100644
index 24ddbde..0000000
--- a/src/mlpack/bindings/matlab/CMakeLists.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-# Build rules for the MATLAB bindings for MLPACK.  These may not work well on
-# non-Linux systems.
-
-# We need the mex compiler for this to work.
-find_package(MatlabMex REQUIRED)
-
-# If the mex compiler is wrapping an "unsupported" version, warn the user that
-# they may have issues with the produced bindings for a multitude of reasons.
-# We can only reasonably check this on a UNIX-like system.
-if(UNIX)
-  # The file test.cpp does not exist, but mex will still print a warning if it's
-  # using a weird version.
-  execute_process(COMMAND "${MATLAB_MEX}" test.cpp
-                  RESULT_VARIABLE MEX_RESULT_TRASH
-                  OUTPUT_VARIABLE MEX_OUTPUT
-                  ERROR_VARIABLE MEX_ERROR_TRASH)
-
-  string(REGEX MATCH "Warning: You are using" MEX_WARNING "${MEX_OUTPUT}")
-
-  if(MEX_WARNING)
-    # We have to find the old compiler version and the new compiler version; if
-    # the MATLAB version is newer, then we don't need to worry.  If this step
-    # fails somehow, we will just issue the warning anyway (just in case).
-    string(REGEX REPLACE
-        ".*using [a-zA-Z]* version \"([0-9.]*)[^\"]*\".*"
-        "\\1" OTHER_COMPILER_VERSION "${MEX_OUTPUT}")
-    string(REGEX REPLACE
-        ".*currently supported with MEX is \"([0-9.]*)[^\"]*\".*"
-        "\\1" MEX_COMPILER_VERSION "${MEX_OUTPUT}")
-
-    # If MEX_COMPILER_VERSION is greater than OTHER_COMPILER_VERSION, we don't
-    # need to issue a warning.
-    set(NEED_TO_WARN 1)
-    if(MEX_COMPILER_VERSION AND OTHER_COMPILER_VERSION)
-      # We seem to have read two valid version strings.  So we can compare
-      # them, and maybe we don't need to issue the warning.
-      if(NOT ("${MEX_COMPILER_VERSION}" VERSION_LESS
-          "${OTHER_COMPILER_VERSION}"))
-        # The mex compiler is newer than our version.  So no warning is
-        # needed.
-        set(NEED_TO_WARN 0)
-      endif(NOT ("${MEX_COMPILER_VERSION}" VERSION_LESS
-          "${OTHER_COMPILER_VERSION}"))
-    endif()
-
-    if(NEED_TO_WARN EQUAL 1)
-      message(WARNING "The MATLAB runtime glibc is different than the system "
-          " glibc.  This can (and probably will) cause the MLPACK bindings "
-          "generated by this build script to fail with odd GLIBCXX_a_b_c "
-          "version complaints when they are run.  Assuming that the system "
-          "glibc is newer than the MATLAB-provided version, the MATLAB version "
-          "can probably be deleted (always save a copy in case this is wrong!)."
-          "\nFor more information on this confusing issue, see\n"
-          "http://dovgalecs.com/blog/matlab-glibcxx_3-4-11-not-found/\nand for "
-          "an overly-detailed dissertation/rant on why it is not possible to "
-          "work around this issue in any way, see\n"
-          "http://www.mlpack.org/trac/ticket/253 for more information.")
-    endif()
-  endif()
-endif()
-
-# Ignore the fact that we are setting CMAKE_SHARED_LIBRARY_CXX_FLAGS on CMake
-# 2.8.9 and newer.  Because we are requiring at least CMake 2.8.5, we only have
-# to check the patch version.
-if(${CMAKE_PATCH_VERSION} GREATER 8)
-  cmake_policy(SET CMP0018 OLD)
-endif()
-
-# Use the mex compiler to compile.
-set(CMAKE_CXX_COMPILER "${MATLAB_MEX}")
-
-# Set flags for the mex compiler, because a lot of the default CMake flags
-# aren't accepted by mex.  The user who wants to customize these things should
-# probably modify their mexopts.sh so that mex uses those flags by default.
-# There is no easy way to tell mex to compile with profiling symbols, so that is
-# not done even if PROFILE is set.
-if(DEBUG)
-  set(CMAKE_CXX_FLAGS "-g")
-  set(CMAKE_C_FLAGS "-g")
-else()
-  set(CMAKE_CXX_FLAGS "-O")
-  set(CMAKE_C_FLAGS "-O")
-endif()
-
-# Don't give -fPIC; mex will do that for us.
-set(CMAKE_SHARED_LIBRARY_C_FLAGS "")
-set(CMAKE_SHARED_LIBRARY_CXX_FLAGS "")
-
-# Don't make 'lib<method>.mexglx'.
-set(CMAKE_SHARED_LIBRARY_PREFIX "")
-set(CMAKE_SHARED_MODULE_PREFIX "")
-
-# Set custom commands for mex compilation, because the flags are (in general)
-# odd and different.
-set(CMAKE_CXX_COMPILE_OBJECT "<CMAKE_CXX_COMPILER> -outdir <OBJECT_DIR> <FLAGS> -c <SOURCE>")
-set(CMAKE_CXX_CREATE_SHARED_MODULE "<CMAKE_CXX_COMPILER> -cxx <LINK_FLAGS> -output <TARGET> <OBJECTS> <LINK_LIBRARIES>")
-set(CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_MODULE}")
-
-# mex is weird because it doesn't respect the -o option, but in general it
-# appears to turn <source>.cpp into <source>.o, so CMake needs to know to
-# replace the extension.
-set(CMAKE_CXX_OUTPUT_EXTENSION_REPLACE 1)
-
-if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
-  set(CMAKE_SHARED_LIBRARY_SUFFIX ".mexa64")
-  set(CMAKE_SHARED_MODULE_SUFFIX  ".mexa64")
-elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" OR ${CMAKE_SYSTEM_PROCESSOR}
-    STREQUAL "i686")
-  set(CMAKE_SHARED_LIBRARY_SUFFIX ".mexglx")
-  set(CMAKE_SHARED_MODULE_SUFFIX  ".mexglx")
-endif()
-
-# Place MATLAB bindings in matlab/.
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/matlab/)
-
-include_directories(${CMAKE_SOURCE_DIR}/src/) # So we can include <mlpack/...>.
-
-# Set MATLAB toolbox install directory.
-set(MATLAB_TOOLBOX_DIR "${MATLAB_ROOT}/toolbox")
-
-# CHANGE HERE FOR NEW BINDINGS!!!!
-add_subdirectory(allkfn)
-add_subdirectory(allknn)
-add_subdirectory(emst)
-add_subdirectory(kmeans)
-add_subdirectory(range_search)
-add_subdirectory(gmm)
-add_subdirectory(pca)
-add_subdirectory(kernel_pca)
-add_subdirectory(lars)
-add_subdirectory(nca)
-add_subdirectory(nmf)
-
-# Create a target whose sole purpose is to modify the pathdef.m MATLAB file so
-# that the MLPACK toolbox is added to the MATLAB default path.
-add_custom_target(matlab ALL
-    # Modify pathdef.m.
-    COMMAND ${CMAKE_COMMAND} -D MATLAB_ROOT="${MATLAB_ROOT}" -D
-        PATHDEF_OUTPUT_FILE="${CMAKE_BINARY_DIR}/matlab/pathdef.m" -P
-        ${CMAKE_SOURCE_DIR}/CMake/ModifyMatlabPathdef.cmake
-    # Due to the dependencies, 'make matlab' makes all the bindings.
-    DEPENDS
-    allknn_mex
-    allkfn_mex
-    emst_mex
-    gmm_mex
-    kmeans_mex
-    range_search_mex
-)
-
-install(FILES "${CMAKE_BINARY_DIR}/matlab/pathdef.m"
-    DESTINATION "${MATLAB_ROOT}/toolbox/local/"
-)
-
diff --git a/src/mlpack/bindings/matlab/allkfn/CMakeLists.txt b/src/mlpack/bindings/matlab/allkfn/CMakeLists.txt
deleted file mode 100644
index 42152b5..0000000
--- a/src/mlpack/bindings/matlab/allkfn/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(allkfn_mex SHARED
-  allkfn.cpp
-)
-target_link_libraries(allkfn_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS allkfn_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  allkfn.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/allkfn/allkfn.cpp b/src/mlpack/bindings/matlab/allkfn/allkfn.cpp
deleted file mode 100644
index 1924d91..0000000
--- a/src/mlpack/bindings/matlab/allkfn/allkfn.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * @file allkfn.cpp
- * @author Patrick Mason
- *
- * MEX function for MATLAB All-kFN binding.
- */
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::neighbor;
-using namespace mlpack::tree;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // Check the inputs.
-  if (nrhs != 6)
-  {
-    mexErrMsgTxt("Expecting seven arguments.");
-  }
-
-  if (nlhs != 2)
-  {
-    mexErrMsgTxt("Two outputs required.");
-  }
-
-  size_t numPoints = mxGetN(prhs[0]);
-  size_t numDimensions = mxGetM(prhs[0]);
-
-  // Create the reference matrix.
-  arma::mat referenceData(numDimensions, numPoints);
-  // setting the values.
-  double * mexDataPoints = mxGetPr(prhs[0]);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-  {
-    referenceData(i) = mexDataPoints[i];
-  }
-
-  // getting the leafsize
-  int lsInt = (int) mxGetScalar(prhs[3]);
-
-  // getting k
-  size_t k = (int) mxGetScalar(prhs[1]);
-
-  // naive algorithm?
-  bool naive = (mxGetScalar(prhs[4]) == 1.0);
-
-  // single mode?
-  bool singleMode = (mxGetScalar(prhs[5]) == 1.0);
-
-  // the query matrix
-  double * mexQueryPoints = mxGetPr(prhs[2]);
-  arma::mat queryData;
-  bool hasQueryData = ((mxGetM(prhs[2]) != 0) && (mxGetN(prhs[2]) != 0));
-
-  // Sanity check on k value: must be greater than 0, must be less than the
-  // number of reference points.
-  if (k > referenceData.n_cols)
-  {
-    stringstream os;
-    os << "Invalid k: " << k << "; must be greater than 0 and less ";
-    os << "than or equal to the number of reference points (";
-    os << referenceData.n_cols << ")." << endl;
-    mexErrMsgTxt(os.str().c_str());
-  }
-
-  // Sanity check on leaf size.
-  if (lsInt < 0)
-  {
-    stringstream os;
-    os << "Invalid leaf size: " << lsInt << ".  Must be greater ";
-    os << "than or equal to 0." << endl;
-    mexErrMsgTxt(os.str().c_str());
-  }
-  size_t leafSize = lsInt;
-
-  // Naive mode overrides single mode.
-  if (singleMode && naive)
-  {
-    mexWarnMsgTxt("single_mode ignored because naive is present.");
-  }
-
-  if (naive)
-    leafSize = referenceData.n_cols;
-
-  arma::Mat<size_t> neighbors;
-  arma::mat distances;
-
-  AllkFN* allkfn = NULL;
-
-  std::vector<size_t> oldFromNewRefs;
-
-  // Build trees by hand, so we can save memory: if we pass a tree to
-  // NeighborSearch, it does not copy the matrix.
-  BinarySpaceTree<bound::HRectBound<2>, QueryStat<FurthestNeighborSort> >
-      refTree(referenceData, oldFromNewRefs, leafSize);
-  BinarySpaceTree<bound::HRectBound<2>, QueryStat<FurthestNeighborSort> >*
-      queryTree = NULL; // Empty for now.
-
-  std::vector<size_t> oldFromNewQueries;
-
-  if (hasQueryData)
-  {
-    // setting the values.
-    mexDataPoints = mxGetPr(prhs[2]);
-    numPoints = mxGetN(prhs[2]);
-    numDimensions = mxGetM(prhs[2]);
-    queryData = arma::mat(numDimensions, numPoints);
-    for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-    {
-      queryData(i) = mexDataPoints[i];
-    }
-
-    if (naive && leafSize < queryData.n_cols)
-      leafSize = queryData.n_cols;
-
-    // Build trees by hand, so we can save memory: if we pass a tree to
-    // NeighborSearch, it does not copy the matrix.
-    queryTree = new BinarySpaceTree<bound::HRectBound<2>,
-        QueryStat<FurthestNeighborSort> >(queryData, oldFromNewQueries,
-        leafSize);
-
-    allkfn = new AllkFN(&refTree, queryTree, referenceData, queryData,
-        singleMode);
-  }
-  else
-  {
-    allkfn = new AllkFN(&refTree, referenceData, singleMode);
-  }
-
-  allkfn->Search(k, neighbors, distances);
-
-  // We have to map back to the original indices from before the tree
-  // construction.
-  arma::mat distancesOut(distances.n_rows, distances.n_cols);
-  arma::Mat<size_t> neighborsOut(neighbors.n_rows, neighbors.n_cols);
-
-  // Do the actual remapping.
-  if (hasQueryData)
-  {
-    for (size_t i = 0; i < distances.n_cols; ++i)
-    {
-      // Map distances (copy a column).
-      distancesOut.col(oldFromNewQueries[i]) = distances.col(i);
-
-      // Map indices of neighbors.
-      for (size_t j = 0; j < distances.n_rows; ++j)
-      {
-        neighborsOut(j, oldFromNewQueries[i]) = oldFromNewRefs[neighbors(j, i)];
-      }
-    }
-  }
-  else
-  {
-    for (size_t i = 0; i < distances.n_cols; ++i)
-    {
-      // Map distances (copy a column).
-      distancesOut.col(oldFromNewRefs[i]) = distances.col(i);
-
-      // Map indices of neighbors.
-      for (size_t j = 0; j < distances.n_rows; ++j)
-      {
-        neighborsOut(j, oldFromNewRefs[i]) = oldFromNewRefs[neighbors(j, i)];
-      }
-    }
-  }
-
-  // Clean up.
-  if (queryTree)
-    delete queryTree;
-
-  // constructing matrix to return to matlab
-  plhs[0] = mxCreateDoubleMatrix(distances.n_rows, distances.n_cols, mxREAL);
-  plhs[1] = mxCreateDoubleMatrix(neighbors.n_rows, neighbors.n_cols, mxREAL);
-
-  // setting the values
-  double * out = mxGetPr(plhs[0]);
-  for (int i = 0, n = distances.n_rows * distances.n_cols; i < n; ++i)
-  {
-    out[i] = distances(i);
-  }
-  out = mxGetPr(plhs[1]);
-  for (int i = 0, n = neighbors.n_rows * neighbors.n_cols; i < n; ++i)
-  {
-    out[i] = neighbors(i);
-  }
-
-  // More clean up.
-  delete allkfn;
-}
diff --git a/src/mlpack/bindings/matlab/allkfn/allkfn.m b/src/mlpack/bindings/matlab/allkfn/allkfn.m
deleted file mode 100644
index b1cd5ba..0000000
--- a/src/mlpack/bindings/matlab/allkfn/allkfn.m
+++ /dev/null
@@ -1,58 +0,0 @@
-function [distances, neighbors] = allkfn(dataPoints, k, varargin)
-% [distances, neighbors] = allkfn(dataPoints, k, varargin)
-%
-% Calculate the all k-furthest-neighbors of a set of points.  You may specify a
-% separate set of reference points and query points, or just a reference set
-% which will be used as both the reference and query set.
-%
-% The output matrices are organized such that row i and column j in the
-% neighbors matrix corresponds to the index of the point in the reference set
-% which is the i'th furthest neighbor from the point in the query set with index
-% j.  Row i and column j in the distances output matrix corresponds to the
-% distance between those two points.
-%
-% Parameters:
-%
-% dataPoints - The reference set of data points.  Columns are assumed to
-%              represent dimensions, with rows representing separate points.
-% k          - The number of furthest neighbors to find.
-%
-% Optional parameters (i.e. allkfn(..., 'parameter', value, ...)):
-%
-% 'queryPoints' - An optional set of query points, if the reference and query
-%                 sets are different.  Columns are assumed to represent
-%                 dimensions, with rows representing separate points.
-% 'leafSize'    - Leaf size in the kd-tree.  Defaults to 20.
-% 'method'      - Algorithm to use.  'naive' uses naive O(n^2) computation;
-%                 'single' uses single-tree traversal; 'dual' uses the standard
-%                 dual-tree traversal.  Defaults to 'dual'.
-%
-% Examples:
-%
-% [distances, neighbors] = allkfn(dataPoints, 5);
-% [distances, neighbors] = allkfn(dataPoints, 5, 'method', 'single');
-% [distances, neighbors] = allkfn(dataPoints, 5, 'queryPoints', queryPoints);
-
-% A parser for the inputs.
-p = inputParser;
-p.addParamValue('queryPoints', zeros(0), @ismatrix);
-p.addParamValue('leafSize', 20, @isscalar);
-p.addParamValue('naive', false, @(x) (x == true) || (x == false));
-p.addParamValue('singleMode', false, @(x) (x == true) || (x == false));
-
-% parsing the varargin options
-varargin{:}
-p.parse(varargin{:});
-parsed = p.Results;
-parsed
-
-% interfacing with mlpack
-[distances neighbors] = mex_allkfn(dataPoints', k, parsed.queryPoints', ...
-    parsed.leafSize, parsed.naive, parsed.singleMode);
-
-% transposing results
-distances = distances';
-neighbors = neighbors' + 1; % matlab indices began at 1, not zero
-
-return;
-
diff --git a/src/mlpack/bindings/matlab/allknn/CMakeLists.txt b/src/mlpack/bindings/matlab/allknn/CMakeLists.txt
deleted file mode 100644
index f7df5b8..0000000
--- a/src/mlpack/bindings/matlab/allknn/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(allknn_mex SHARED
-  allknn.cpp
-)
-target_link_libraries(allknn_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS allknn_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  allknn.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/allknn/allknn.cpp b/src/mlpack/bindings/matlab/allknn/allknn.cpp
deleted file mode 100644
index a13b114..0000000
--- a/src/mlpack/bindings/matlab/allknn/allknn.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-/**
- * @file allknn.cpp
- * @author Patrick Mason
- *
- * MEX function for MATLAB All-kNN binding.
- */
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/core/tree/cover_tree.hpp>
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::neighbor;
-using namespace mlpack::tree;
-
-// the gateway, required by all mex functions
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // checking inputs
-  if (nrhs != 7)
-  {
-    mexErrMsgTxt("Expecting seven arguments.");
-  }
-
-  if (nlhs != 2)
-  {
-    mexErrMsgTxt("Two outputs required.");
-  }
-
-  // getting the dimensions of the reference matrix
-  size_t numPoints = mxGetN(prhs[0]);
-  size_t numDimensions = mxGetM(prhs[0]);
-
-  // feeding the referenceData matrix
-  arma::mat referenceData(numDimensions, numPoints);
-  // setting the values.
-  double * mexDataPoints = mxGetPr(prhs[0]);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-  {
-    referenceData(i) = mexDataPoints[i];
-  }
-
-  // getting the leafsize
-  int lsInt = (int) mxGetScalar(prhs[3]);
-
-  // getting k
-  size_t k = (int) mxGetScalar(prhs[1]);
-
-  // naive algorithm?
-  bool naive = (mxGetScalar(prhs[4]) == 1.0);
-
-  // single mode?
-  bool singleMode = (mxGetScalar(prhs[5]) == 1.0);
-
-  // the query matrix
-  double * mexQueryPoints = mxGetPr(prhs[2]);
-  arma::mat queryData;
-  bool hasQueryData = ((mxGetM(prhs[2]) != 0) && (mxGetN(prhs[2]) != 0));
-
-  // cover-tree?
-  bool usesCoverTree = (mxGetScalar(prhs[6]) == 1.0);
-
-  // Sanity check on k value: must be greater than 0, must be less than the
-  // number of reference points.
-  if (k > referenceData.n_cols)
-  {
-    stringstream os;
-    os << "Invalid k: " << k << "; must be greater than 0 and less ";
-    os << "than or equal to the number of reference points (";
-    os << referenceData.n_cols << ")." << endl;
-    mexErrMsgTxt(os.str().c_str());
-  }
-
-  // Sanity check on leaf size.
-  if (lsInt < 0)
-  {
-    stringstream os;
-    os << "Invalid leaf size: " << lsInt << ".  Must be greater "
-        "than or equal to 0." << endl;
-    mexErrMsgTxt(os.str().c_str());
-  }
-  size_t leafSize = lsInt;
-
-  // Naive mode overrides single mode.
-  if (singleMode && naive)
-  {
-     mexWarnMsgTxt("single_mode ignored because naive is present.");
-  }
-
-  if (naive)
-    leafSize = referenceData.n_cols;
-
-  arma::Mat<size_t> neighbors;
-  arma::mat distances;
-
-  //if (!CLI::HasParam("cover_tree"))
-  if (usesCoverTree)
-  {
-    // Because we may construct it differently, we need a pointer.
-    AllkNN* allknn = NULL;
-
-    // Mappings for when we build the tree.
-    std::vector<size_t> oldFromNewRefs;
-
-    // Build trees by hand, so we can save memory: if we pass a tree to
-    // NeighborSearch, it does not copy the matrix.
-
-    BinarySpaceTree<bound::HRectBound<2>, QueryStat<NearestNeighborSort> >
-      refTree(referenceData, oldFromNewRefs, leafSize);
-    BinarySpaceTree<bound::HRectBound<2>, QueryStat<NearestNeighborSort> >*
-      queryTree = NULL; // Empty for now.
-
-    std::vector<size_t> oldFromNewQueries;
-
-    if (hasQueryData)
-    {
-      // setting the values.
-      mexDataPoints = mxGetPr(prhs[2]);
-      numPoints = mxGetN(prhs[2]);
-      numDimensions = mxGetM(prhs[2]);
-      queryData = arma::mat(numDimensions, numPoints);
-      for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-      {
-        queryData(i) = mexDataPoints[i];
-      }
-
-      if (naive && leafSize < queryData.n_cols)
-        leafSize = queryData.n_cols;
-
-      // Build trees by hand, so we can save memory: if we pass a tree to
-      // NeighborSearch, it does not copy the matrix.
-      if (!singleMode)
-      {
-        queryTree = new BinarySpaceTree<bound::HRectBound<2>,
-            QueryStat<NearestNeighborSort> >(queryData, oldFromNewQueries,
-            leafSize);
-      }
-
-      allknn = new AllkNN(&refTree, queryTree, referenceData, queryData,
-          singleMode);
-    }
-    else
-    {
-      allknn = new AllkNN(&refTree, referenceData, singleMode);
-    }
-
-    arma::mat distancesOut;
-    arma::Mat<size_t> neighborsOut;
-
-    allknn->Search(k, neighborsOut, distancesOut);
-
-    // We have to map back to the original indices from before the tree
-    // construction.
-    neighbors.set_size(neighborsOut.n_rows, neighborsOut.n_cols);
-    distances.set_size(distancesOut.n_rows, distancesOut.n_cols);
-
-    // Do the actual remapping.
-    if ((hasQueryData) && !singleMode)
-    {
-      for (size_t i = 0; i < distancesOut.n_cols; ++i)
-      {
-        // Map distances (copy a column) and square root.
-        distances.col(oldFromNewQueries[i]) = sqrt(distancesOut.col(i));
-
-        // Map indices of neighbors.
-        for (size_t j = 0; j < distancesOut.n_rows; ++j)
-        {
-          neighbors(j, oldFromNewQueries[i]) =
-              oldFromNewRefs[neighborsOut(j, i)];
-        }
-      }
-    }
-    else if ((hasQueryData) && singleMode)
-    {
-      // No remapping of queries is necessary.  So distances are the same.
-      distances = sqrt(distancesOut);
-
-      // The neighbor indices must be mapped.
-      for (size_t j = 0; j < neighborsOut.n_elem; ++j)
-      {
-        neighbors[j] = oldFromNewRefs[neighborsOut[j]];
-      }
-    }
-    else
-    {
-      for (size_t i = 0; i < distancesOut.n_cols; ++i)
-      {
-        // Map distances (copy a column).
-        distances.col(oldFromNewRefs[i]) = sqrt(distancesOut.col(i));
-
-        // Map indices of neighbors.
-        for (size_t j = 0; j < distancesOut.n_rows; ++j)
-        {
-          neighbors(j, oldFromNewRefs[i]) = oldFromNewRefs[neighborsOut(j, i)];
-        }
-      }
-    }
-
-    // Clean up.
-    if (queryTree)
-      delete queryTree;
-
-    delete allknn;
-  }
-  else // Cover trees.
-  {
-    // Build our reference tree.
-    CoverTree<metric::LMetric<2, true>, tree::FirstPointIsRoot,
-        QueryStat<NearestNeighborSort> > referenceTree(referenceData, 1.3);
-    CoverTree<metric::LMetric<2, true>, tree::FirstPointIsRoot,
-        QueryStat<NearestNeighborSort> >* queryTree = NULL;
-
-    NeighborSearch<NearestNeighborSort, metric::LMetric<2, true>,
-        CoverTree<metric::LMetric<2, true>, tree::FirstPointIsRoot,
-        QueryStat<NearestNeighborSort> > >* allknn = NULL;
-
-    // See if we have query data.
-    if (hasQueryData)
-    {
-      // setting the values.
-      mexDataPoints = mxGetPr(prhs[2]);
-      numPoints = mxGetN(prhs[2]);
-      numDimensions = mxGetM(prhs[2]);
-      queryData = arma::mat(numDimensions, numPoints);
-      for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-      {
-        queryData(i) = mexDataPoints[i];
-      }
-
-      // Build query tree.
-      if (!singleMode)
-      {
-        queryTree = new CoverTree<metric::LMetric<2, true>,
-            tree::FirstPointIsRoot, QueryStat<NearestNeighborSort> >(queryData,
-            1.3);
-      }
-
-      allknn = new NeighborSearch<NearestNeighborSort, metric::LMetric<2, true>,
-          CoverTree<metric::LMetric<2, true>, tree::FirstPointIsRoot,
-          QueryStat<NearestNeighborSort> > >(&referenceTree, queryTree,
-          referenceData, queryData, singleMode);
-    }
-    else
-    {
-      allknn = new NeighborSearch<NearestNeighborSort, metric::LMetric<2, true>,
-          CoverTree<metric::LMetric<2, true>, tree::FirstPointIsRoot,
-          QueryStat<NearestNeighborSort> > >(&referenceTree, referenceData,
-          singleMode);
-    }
-
-    allknn->Search(k, neighbors, distances);
-
-    delete allknn;
-
-    if (queryTree)
-      delete queryTree;
-  }
-
-  // writing back to matlab
-  // constructing matrix to return to matlab
-  plhs[0] = mxCreateDoubleMatrix(distances.n_rows, distances.n_cols, mxREAL);
-  plhs[1] = mxCreateDoubleMatrix(neighbors.n_rows, neighbors.n_cols, mxREAL);
-
-  // setting the values
-  double * out = mxGetPr(plhs[0]);
-  for (int i = 0, n = distances.n_rows * distances.n_cols; i < n; ++i)
-  {
-    out[i] = distances(i);
-  }
-  out = mxGetPr(plhs[1]);
-  for (int i = 0, n = neighbors.n_rows * neighbors.n_cols; i < n; ++i)
-  {
-    out[i] = neighbors(i);
-  }
-
-}
diff --git a/src/mlpack/bindings/matlab/allknn/allknn.m b/src/mlpack/bindings/matlab/allknn/allknn.m
deleted file mode 100644
index e796602..0000000
--- a/src/mlpack/bindings/matlab/allknn/allknn.m
+++ /dev/null
@@ -1,60 +0,0 @@
-function [distances neighbors] = allknn(dataPoints, k, varargin)
-%All K-Nearest-Neighbors
-%
-%  This program will calculate the all k-nearest-neighbors of a set of points
-%  using kd-trees or cover trees (cover tree support is experimental and may not
-%  be optimally fast). You may specify a separate set of reference points and
-%  query points, or just a reference set which will be used as both the reference
-%  and query set.
-%
-%  For example, the following will calculate the 5 nearest neighbors of eachpoint
-%  in 'input.csv' and store the distances in 'distances.csv' and the neighbors in
-%  the file 'neighbors.csv':
-
-%  $ allknn --k=5 --reference_file=input.csv --distances_file=distances.csv
-%    --neighbors_file=neighbors.csv
-
-%  The output files are organized such that row i and column j in the neighbors
-%  output file corresponds to the index of the point in the reference set which
-%  is the i'th nearest neighbor from the point in the query set with index j.
-%  Row i and column j in the distances output file corresponds to the distance
-%  between those two points.
-%
-% Parameters:
-% dataPoints - the matrix of data points. Columns are assumed to represent dimensions,
-%              with rows representing seperate points.
-% method     - the algorithm for computing the tree. 'naive' or 'boruvka', with
-%              'boruvka' being the default algorithm.
-% leafSize   - Leaf size in the kd-tree.  One-element leaves give the
-%              empirically best performance, but at the cost of greater memory
-%              requirements. One is default.
-%
-% Examples:
-% result = emst(dataPoints);
-% or
-% esult = emst(dataPoints,'method','naive');
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('queryPoints', zeros(0), @ismatrix);
-p.addParamValue('leafSize', 20, @isscalar);
-p.addParamValue('naive', false, @(x) (x == true) || (x == false));
-p.addParamValue('singleMode', false, @(x) (x == true) || (x == false));
-p.addParamValue('coverTree', false, @(x) (x == true) || (x == false));
-
-% parsing the varargin options
-varargin{:}
-p.parse(varargin{:});
-parsed = p.Results;
-parsed
-
-% interfacing with mlpack
-[distances neighbors] = mex_allknn(dataPoints', k, parsed.queryPoints', ...
-	parsed.leafSize, parsed.naive, parsed.singleMode, parsed.coverTree);
-
-% transposing results
-distances = distances';
-neighbors = neighbors' + 1; % matlab indices began at 1, not zero
-
-return;
-
diff --git a/src/mlpack/bindings/matlab/emst/CMakeLists.txt b/src/mlpack/bindings/matlab/emst/CMakeLists.txt
deleted file mode 100644
index 3b79cdf..0000000
--- a/src/mlpack/bindings/matlab/emst/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(emst_mex SHARED
-  emst.cpp
-)
-target_link_libraries(emst_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS emst_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  emst.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/emst/emst.cpp b/src/mlpack/bindings/matlab/emst/emst.cpp
deleted file mode 100644
index 24e6c8a..0000000
--- a/src/mlpack/bindings/matlab/emst/emst.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * @file emst.cpp
- * @author Patrick Mason
- *
- * MEX function for MATLAB EMST binding.
- */
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/emst/dtb.hpp>
-
-#include <iostream>
-
-using namespace mlpack;
-using namespace mlpack::emst;
-using namespace mlpack::tree;
-
-// The gateway, required by all mex functions.
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // Argument checks.
-  if (nrhs != 3)
-  {
-    mexErrMsgTxt("Expecting an datapoints matrix, isBoruvka, and leafSize.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  const size_t numPoints = mxGetN(prhs[0]);
-  const size_t numDimensions = mxGetM(prhs[0]);
-
-  // Converting from mxArray to armadillo matrix.
-  arma::mat dataPoints(numDimensions, numPoints);
-
-  // Set the values.
-  double* mexDataPoints = mxGetPr(prhs[0]);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-  {
-    dataPoints(i) = mexDataPoints[i];
-  }
-
-  const bool isBoruvka = (mxGetScalar(prhs[1]) == 1.0);
-
-  // Run the computation.
-  arma::mat result;
-  if (isBoruvka)
-  {
-    // Get the number of leaves.
-    const size_t leafSize = (size_t) mxGetScalar(prhs[2]);
-
-    DualTreeBoruvka<> dtb(dataPoints, false, leafSize);
-    dtb.ComputeMST(result);
-  }
-  else
-  {
-    DualTreeBoruvka<> naive(dataPoints, true);
-    naive.ComputeMST(result);
-  }
-
-  // Construct matrix to return to MATLAB.
-  plhs[0] = mxCreateDoubleMatrix(3, numPoints - 1, mxREAL);
-
-  double* out = mxGetPr(plhs[0]);
-  for (int i = 0, n = (numPoints - 1) * 3; i < n; ++i)
-  {
-    out[i] = result(i);
-  }
-}
diff --git a/src/mlpack/bindings/matlab/emst/emst.m b/src/mlpack/bindings/matlab/emst/emst.m
deleted file mode 100644
index ce84fa7..0000000
--- a/src/mlpack/bindings/matlab/emst/emst.m
+++ /dev/null
@@ -1,52 +0,0 @@
-function result = emst(dataPoints, varargin)
-% result = emst(dataPoints, varargin)
-%
-% Compute the Euclidean minimum spanning tree of a set of input points using the
-% dual-tree Boruvka algorithm.
-%
-% The output is saved in a three-column matrix, where each row indicates an
-% edge.  The first column corresponds to the lesser index of the edge; the
-% second column corresponds to the greater index of the edge; and the third
-% column corresponds to the distance between the two points.
-%
-% Required parameters:
-%
-% dataPoints - The matrix of data points. Columns are assumed to represent
-%              dimensions, with rows representing separate points.
-%
-% Optional parameters (i.e. emst(..., 'parameter', value, ...)):
-%
-% 'method'   - The algorithm for computing the tree. 'naive' or 'boruvka', with
-%              'boruvka' being the default dual-tree Boruvka algorithm.
-% 'leafSize' - Leaf size in the kd-tree.  One-element leaves give the
-%              empirically best performance, but at the cost of greater memory
-%              requirements.  Defaults to 1.
-%
-% Examples:
-%
-% result = emst(dataPoints);
-% result = emst(dataPoints, 'method', 'naive');
-% result = emst(dataPoints, 'method', 'naive', 'leafSize', 5);
-
-% A parser for the inputs.
-p = inputParser;
-p.addParamValue('method', 'boruvka', ...
-    @(x) strcmpi(x, 'naive') || strcmpi(x, 'boruvka'));
-p.addParamValue('leafSize', 1, @isscalar);
-
-% Parse the varargin options.
-p.parse(varargin{:});
-parsed = p.Results;
-
-% Interface with mlpack. Transpose to machine learning standards.  MLPACK
-% expects column-major matrices; the user has passed in a row-major matrix.
-if strcmpi(parsed.method, 'boruvka')
-  result = emst_mex(dataPoints', 1, parsed.leafSize);
-    result = result';
-  return;
-else
-  result = emst_mex(dataPoints', 0, 1);
-    result = result';
-  return;
-end
-
diff --git a/src/mlpack/bindings/matlab/gmm/CMakeLists.txt b/src/mlpack/bindings/matlab/gmm/CMakeLists.txt
deleted file mode 100644
index dacb527..0000000
--- a/src/mlpack/bindings/matlab/gmm/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(gmm_mex SHARED
-  gmm.cpp
-)
-target_link_libraries(gmm_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS gmm_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  gmm.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/gmm/gmm.cpp b/src/mlpack/bindings/matlab/gmm/gmm.cpp
deleted file mode 100644
index 63a366e..0000000
--- a/src/mlpack/bindings/matlab/gmm/gmm.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * @file gmm.cpp
- * @author Patrick Mason
- *
- * MEX function for MATLAB GMM binding.
- */
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/gmm/gmm.hpp>
-
-using namespace mlpack;
-using namespace mlpack::gmm;
-using namespace mlpack::util;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 3)
-  {
-    mexErrMsgTxt("Expecting three inputs.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  size_t seed = (size_t) mxGetScalar(prhs[2]);
-  // Check parameters and load data.
-  if (seed != 0)
-    math::RandomSeed(seed);
-  else
-    math::RandomSeed((size_t) std::time(NULL));
-
-  // loading the data
-  double * mexDataPoints = mxGetPr(prhs[0]);
-  size_t numPoints = mxGetN(prhs[0]);
-  size_t numDimensions = mxGetM(prhs[0]);
-  arma::mat dataPoints(numDimensions, numPoints);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-  {
-    dataPoints(i) = mexDataPoints[i];
-  }
-
-  int gaussians = (int) mxGetScalar(prhs[1]);
-  if (gaussians <= 0)
-  {
-    std::stringstream ss;
-    ss << "Invalid number of Gaussians (" << gaussians << "); must "
-        "be greater than or equal to 1." << std::endl;
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  // Calculate mixture of Gaussians.
-  GMM<> gmm(size_t(gaussians), dataPoints.n_rows);
-
-  ////// Computing the parameters of the model using the EM algorithm //////
-  gmm.Estimate(dataPoints);
-
-  // setting up the matlab structure to be returned
-  mwSize ndim = 1;
-  mwSize dims[1] = {
-    1
-  };
-  const char * fieldNames[3] = {
-    "dimensionality"
-    , "weights"
-    , "gaussians"
-  };
-
-  plhs[0] =  mxCreateStructArray(ndim, dims, 3, fieldNames);
-
-  // dimensionality
-  mxArray * field_value;
-  field_value = mxCreateDoubleMatrix(1, 1, mxREAL);
-  *mxGetPr(field_value) = numDimensions;
-  mxSetFieldByNumber(plhs[0], 0, 0, field_value);
-
-  // mixture weights
-  field_value = mxCreateDoubleMatrix(gmm.Weights().size(), 1, mxREAL);
-  double * values = mxGetPr(field_value);
-  for (int i=0; i<gmm.Weights().size(); ++i)
-  {
-    values[i] = gmm.Weights()[i];
-  }
-  mxSetFieldByNumber(plhs[0], 0, 1, field_value);
-
-  // gaussian mean/variances
-  const char * gaussianNames[2] = {
-    "mean"
-    , "covariance"
-  };
-  ndim = 1;
-  dims[0] = gmm.Gaussians();
-
-  field_value = mxCreateStructArray(ndim, dims, 2, gaussianNames);
-  for (int i=0; i<gmm.Gaussians(); ++i)
-  {
-    mxArray * tmp;
-    double * values;
-
-    // setting the mean
-    arma::mat mean = gmm.Means()[i];
-    tmp = mxCreateDoubleMatrix(numDimensions, 1, mxREAL);
-    values = mxGetPr(tmp);
-    for (int j = 0; j < numDimensions; ++j)
-    {
-      values[j] = mean(j);
-    }
-    // note: SetField does not copy the data structure.
-    // mxDuplicateArray does the necessary copying.
-    mxSetFieldByNumber(field_value, i, 0, mxDuplicateArray(tmp));
-    mxDestroyArray(tmp);
-
-    // setting the covariance matrix
-    arma::mat covariance = gmm.Covariances()[i];
-    tmp = mxCreateDoubleMatrix(numDimensions, numDimensions, mxREAL);
-    values = mxGetPr(tmp);
-    for (int j = 0; j < numDimensions * numDimensions; ++j)
-    {
-      values[j] = covariance(j);
-    }
-    mxSetFieldByNumber(field_value, i, 1, mxDuplicateArray(tmp));
-    mxDestroyArray(tmp);
-  }
-  mxSetFieldByNumber(plhs[0], 0, 2, field_value);
-}
diff --git a/src/mlpack/bindings/matlab/gmm/gmm.m b/src/mlpack/bindings/matlab/gmm/gmm.m
deleted file mode 100644
index 349ba71..0000000
--- a/src/mlpack/bindings/matlab/gmm/gmm.m
+++ /dev/null
@@ -1,28 +0,0 @@
-function result = gmm(dataPoints, varargin)
-%Gaussian Mixture Model (GMM) Training
-%
-%  This program takes a parametric estimate of a Gaussian mixture model (GMM)
-%  using the EM algorithm to find the maximum likelihood estimate.  The model is
-%  saved to an XML file, which contains information about each Gaussian.
-%
-%Parameters:
-% dataPoints- (required) Matrix containing the data on which the model will be fit
-% seed      - (optional) Random seed.  If 0, 'std::time(NULL)' is used.
-%					    Default value is 0.
-% gaussians - (optional) Number of gaussians in the GMM. Default value is 1.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('gaussians', 1, @isscalar);
-p.addParamValue('seed', 0, @isscalar);
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack
-result = mex_gmm(dataPoints', parsed.gaussians, parsed.seed);
-
-
-
-
diff --git a/src/mlpack/bindings/matlab/hmm/hmm_generate.cpp b/src/mlpack/bindings/matlab/hmm/hmm_generate.cpp
deleted file mode 100644
index 204107b..0000000
--- a/src/mlpack/bindings/matlab/hmm/hmm_generate.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-
-#include "hmm.hpp"
-#include "hmm_util.hpp"
-#include <mlpack/methods/gmm/gmm.hpp>
-
-/*
-PROGRAM_INFO("Hidden Markov Model (HMM) Sequence Generator", "This "
-    "utility takes an already-trained HMM (--model_file) and generates a "
-    "random observation sequence and hidden state sequence based on its "
-    "parameters, saving them to the specified files (--output_file and "
-    "--state_file)");
-
-PARAM_STRING_REQ("model_file", "File containing HMM (XML).", "m");
-PARAM_INT_REQ("length", "Length of sequence to generate.", "l");
-
-PARAM_INT("start_state", "Starting state of sequence.", "t", 0);
-PARAM_STRING("output_file", "File to save observation sequence to.", "o",
-    "output.csv");
-PARAM_STRING("state_file", "File to save hidden state sequence to (may be left "
-    "unspecified.", "S", "");
-PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
-*/
-
-
-using namespace mlpack;
-using namespace mlpack::hmm;
-using namespace mlpack::distribution;
-using namespace mlpack::utilities;
-using namespace mlpack::gmm;
-using namespace mlpack::math;
-using namespace arma;
-using namespace std;
-
-namespace {
-	// gets the transition matrix from the struct
-	void getTransition(mat & transition, const mxArray * mxarray)
-	{
-		mxArray * mxTransitions = mxGetField(mxarray, 0, "transition");
-		if (NULL == mxTransitions)
-		{
-			mexErrMsgTxt("Model struct did not have transition matrix 'transition'.");
-		}
-		if (mxDOUBLE_CLASS != mxGetClassID(mxTransitions))
-		{
-			mexErrMsgTxt("Transition matrix 'transition' must have type mxDOUBLE_CLASS.");
-		}
-		const size_t m = mxGetM(mxTransitions);
-		const size_t n = mxGetN(mxTransitions);
-		transition.resize(m,n);
-
-		double * values = mxGetPr(mxTransitions);
-		for (int i = 0; i < m*n; ++i)
-			transition(i) = values[i];
-	}
-
-	// writes the matlab transition matrix to the model
-	template <class T>
-	void writeTransition(HMM<T> & hmm, const mxArray * mxarray)
-	{
-		mxArray * mxTransitions = mxGetField(mxarray, 0, "transition");
-		if (NULL == mxTransitions)
-		{
-			mexErrMsgTxt("Model struct did not have transition matrix 'transition'.");
-		}
-		if (mxDOUBLE_CLASS != mxGetClassID(mxTransitions))
-		{
-			mexErrMsgTxt("Transition matrix 'transition' must have type mxDOUBLE_CLASS.");
-		}
-
-		arma::mat transition(mxGetM(mxTransitions), mxGetN(mxTransitions));
-		double * values = mxGetPr(mxTransitions);
-		for (int i = 0; i < mxGetM(mxTransitions) * mxGetN(mxTransitions); ++i)
-			transition(i) = values[i];
-
-		hmm.Transition() = transition;
-	}
-
-	// argument check on the emission field
-	void checkEmission(const mat & transition, const mxArray * mxarray)
-	{
-		if (NULL == mxarray)
-		{
-			mexErrMsgTxt("Model struct did not have 'emission' struct.");
-		}
-		if ((int) mxGetN(mxarray) != (int) transition.n_rows)
-		{
-			stringstream ss;
-			ss << "'emissions' struct array must have dimensions 1 x "
-				<<  transition.n_rows << ".";
-			mexErrMsgTxt(ss.str().c_str());
-		}
-	}
-
-} // closing anonymous namespace
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 4)
-  {
-    mexErrMsgTxt("Expecting four arguments.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-	// seed argument
-	size_t seed = (size_t) mxGetScalar(prhs[3]);
-
-  // Set random seed.
-	if (seed != 0)
-    mlpack::math::RandomSeed(seed);
-  else
-    mlpack::math::RandomSeed((size_t) std::time(NULL));
-
-	// length of observations
-	const int length =  (int) mxGetScalar(prhs[1]);
-
-	// start state
-	const int startState = (int) mxGetScalar(prhs[2]);
-
-  if (length <= 0)
-  {
-		stringstream ss;
-    ss << "Invalid sequence length (" << length << "); must be greater "
-        << "than or equal to 0!";
-		mexErrMsgTxt(ss.str().c_str());
-  }
-
-	// getting the model type
-	if (mxIsStruct(prhs[0]) == 0)
-	{
-		mexErrMsgTxt("Model argument is not a struct.");
-	}
-
-	mxArray * mxHmmType = mxGetField(prhs[0], 0, "hmm_type");
-	if (mxHmmType == NULL)
-	{
-		mexErrMsgTxt("Model struct did not have 'hmm_type'.");
-	}
-	if (mxCHAR_CLASS != mxGetClassID(mxHmmType))
-	{
-		mexErrMsgTxt("'hmm_type' must have type mxCHAR_CLASS.");
-	}
-
-	// getting the model type string
-	int bufLength = mxGetNumberOfElements(mxHmmType) + 1;
-	char * buf;
-	buf = (char *) mxCalloc(bufLength, sizeof(char));
-  mxGetString(mxHmmType, buf, bufLength);
-	string type(buf);
-	mxFree(buf);
-
-	cout << type << endl;
-
-	// to be filled by the generator
-	mat observations;
-  Col<size_t> sequence;
-
-	// to be removed!
-	SaveRestoreUtility sr;
-
-  if (type == "discrete")
-  {
-    HMM<DiscreteDistribution> hmm(1, DiscreteDistribution(1));
-
-		// writing transition matrix to the hmm
-		writeTransition(hmm, prhs[0]);
-
-		// writing emission matrix to the hmm
-		mxArray * mxEmission = mxGetField(prhs[0], 0, "emission");
-		//checkEmission(hmm, mxEmission);
-
-		vector<DiscreteDistribution> emission(hmm.Transition().n_rows);
-		for (int i=0; i<hmm.Transition().n_rows; ++i)
-		{
-			mxArray * mxProbabilities = mxGetField(mxEmission, i, "probabilities");
-			if (NULL == mxProbabilities)
-			{
-				mexErrMsgTxt("'probabilities' field could not be found in 'emission' struct.");
-			}
-
-			arma::vec probabilities(mxGetN(mxProbabilities));
-			double * values = mxGetPr(mxProbabilities);
-			for (int j=0; j<mxGetN(mxProbabilities); ++j)
-				probabilities(j) = values[j];
-
-			emission[i] = DiscreteDistribution(probabilities);
-		}
-
-		hmm.Emission() = emission;
-
-		// At this point, the HMM model should be fully formed.
-    if (startState < 0 || startState >= (int) hmm.Transition().n_rows)
-    {
-			stringstream ss;
-      ss << "Invalid start state (" << startState << "); must be "
-          << "between 0 and number of states (" << hmm.Transition().n_rows
-          << ")!";
-			mexErrMsgTxt(ss.str().c_str());
-    }
-
-    hmm.Generate(size_t(length), observations, sequence, size_t(startState));
-  }
-  else if (type == "gaussian")
-  {
-		/*
-    //HMM<GaussianDistribution> hmm(1, GaussianDistribution(1));
-
-		// get transition matrix
-		//mat transition;
-		//getTransition(transition, prhs[0]);
-
-		//hmm.Transition() = transition;
-		//cout << transition << endl;
-		arma::mat transition("0.75 0.25; 0.25 0.75");
-
-		// get emission
-		//vector<GaussianDistribution> emission(transition.n_rows);
-		vector<GaussianDistribution> emission;
-  	GaussianDistribution g1("5.0 5.0", "1.0 0.0; 0.0 1.0");
-  	GaussianDistribution g2("-5.0 -5.0", "1.0 0.0; 0.0 1.0");
-  	emission.push_back(g1);
-  	emission.push_back(g2);
-
-
-		//HMM<GaussianDistribution> hmm(transition, emission);
-		//hmm.Emission() = emission;
-		HMM<GaussianDistribution> hmm(transition, emission);
-		*/
-
-		// Our distribution will have three two-dimensional output Gaussians.
-		cout << "following the test" << endl;
-  	HMM<GaussianDistribution> hmm(3, GaussianDistribution(2));
-  	hmm.Transition() = arma::mat("0.4 0.6 0.8; 0.2 0.2 0.1; 0.4 0.2 0.1");
-  	hmm.Emission()[0] = GaussianDistribution("0.0 0.0", "1.0 0.0; 0.0 1.0");
-  	hmm.Emission()[1] = GaussianDistribution("2.0 2.0", "1.0 0.5; 0.5 1.2");
-  	hmm.Emission()[2] = GaussianDistribution("-2.0 1.0", "2.0 0.1; 0.1 1.0");
-
-  	// Now we will generate a long sequence.
-  	std::vector<arma::mat> observations2(1);
-  	std::vector<arma::Col<size_t> > states2(1);
-
-		// testing
-  	SaveHMM(hmm, sr);
-  	sr.WriteFile("testMexGaussian.xml");
-
-  	// Start in state 1 (no reason).
-  	cout << "test generation" << endl;
-		hmm.Generate(10000, observations2[0], states2[0], 1);
-		cout << "test complete" << endl;
-
-    if (startState < 0 || startState >= (int) hmm.Transition().n_rows)
-    {
-			stringstream ss;
-			ss << "Invalid start state (" << startState << "); must be "
-          << "between 0 and number of states (" << hmm.Transition().n_rows
-          << ")!";
-			mexErrMsgTxt(ss.str().c_str());
-    }
-		cout << "generating!" << endl;
-    hmm.Generate(size_t(length), observations, sequence, size_t(startState));
-		cout << "done!" << endl;
-  }
-  else if (type == "gmm")
-  {
-    HMM<GMM<> > hmm(1, GMM<>(1, 1));
-
-    LoadHMM(hmm, sr);
-
-    if (startState < 0 || startState >= (int) hmm.Transition().n_rows)
-    {
-      Log::Fatal << "Invalid start state (" << startState << "); must be "
-          << "between 0 and number of states (" << hmm.Transition().n_rows
-          << ")!" << endl;
-    }
-
-    hmm.Generate(size_t(length), observations, sequence, size_t(startState));
-  }
-  else
-  {
-    Log::Fatal << "Unknown HMM type '" << type << "'" << "'!" << endl;
-  }
-
-	cout << "returning to matlab" << endl;
-
-	// Setting values to be returned to matlab
-	mwSize ndim = 1;
-  mwSize dims[1] = {1};
-  const char * fieldNames[2] = {
-    "observations"
-    , "states"
-  };
-
-	plhs[0] = mxCreateStructArray(ndim, dims, 2, fieldNames);
-
-	mxArray * tmp;
-	double * values;
-
-	cout << observations.n_rows << "," << observations.n_cols << endl;
-	cout << sequence.n_rows << "," << sequence.n_cols << endl;
-	cout << observations << endl;
-	cout << sequence << endl;
-
-	// settings the observations
-	tmp = mxCreateDoubleMatrix(observations.n_rows, observations.n_cols, mxREAL);
-	values = mxGetPr(tmp);
-	for (int i=0; i<observations.n_rows * observations.n_cols; ++i)
-		values[i] = observations(i);
-
-	// note: SetField does not copy the data structure.
-	// mxDuplicateArray does the necessary copying.
-	mxSetFieldByNumber(plhs[0], 0, 0, mxDuplicateArray(tmp));
-	mxDestroyArray(tmp);
-
-	// settings the observations
-	tmp = mxCreateDoubleMatrix(sequence.n_rows, sequence.n_cols, mxREAL);
-	values = mxGetPr(tmp);
-	for (int i=0; i<length; ++i)
-		values[i] = sequence(i);
-
-	// note: SetField does not copy the data structure.
-	// mxDuplicateArray does the necessary copying.
-	mxSetFieldByNumber(plhs[0], 0, 1, mxDuplicateArray(tmp));
-	mxDestroyArray(tmp);
-}
-
-		/*
-		mxArray * mxEmission = mxGetField(prhs[0], 0, "emission");
-		checkEmission(transition, mxEmission);
-
-		vector<GaussianDistribution> emission(transition.n_rows);
-		for (int i=0; i<transition.n_rows; ++i)
-		{
-			// mean
-			mxArray * mxMean = mxGetField(mxEmission, i, "mean");
-			if (NULL == mxMean)
-			{
-				mexErrMsgTxt("'mean' field could not be found in 'emission' struct.");
-			}
-
-			arma::vec mean(mxGetN(mxMean));
-			double * values = mxGetPr(mxMean);
-			for (int j=0; j<mxGetN(mxMean); ++j)
-				mean(j) = values[j];
-
-			cout << mean << endl;
-
-			// covariance
-			mxArray * mxCovariance = mxGetField(mxEmission, i, "covariance");
-			if (NULL == mxCovariance)
-			{
-				mexErrMsgTxt("'covariance' field could not be found in 'emission' struct.");
-			}
-
-			const size_t m = (size_t) mxGetM(mxCovariance);
-			const size_t n = (size_t) mxGetN(mxCovariance);
-			mat covariance(m, n);
-			values = mxGetPr(mxCovariance);
-			for (int j=0; j < m * n; ++j)
-				covariance(j) = values[j];
-
-			cout << covariance << endl;
-
-			emission[i] = GaussianDistribution(mean, covariance);
-		}
-		*/
diff --git a/src/mlpack/bindings/matlab/hmm/hmm_generate.m b/src/mlpack/bindings/matlab/hmm/hmm_generate.m
deleted file mode 100644
index 0b62d3e..0000000
--- a/src/mlpack/bindings/matlab/hmm/hmm_generate.m
+++ /dev/null
@@ -1,28 +0,0 @@
-function sequence = hmm_generate(model, sequence_length, varargin)
-%Hidden Markov Model (HMM) Sequence Generator
-%
-%  This utility takes an already-trained HMM (model) and generates a
-%  random observation sequence and hidden state sequence based on its parameters,
-%  saving them to the specified files (--output_file and --state_file)
-%
-%Parameters:
-% model           - (required) HMM model struct.
-% sequence_length - (required) Length of the sequence to produce.
-% start_state	    - (optional) Starting state of sequence.  Default value 0.
-% seed            - (optional) Random seed.  If 0, 'std::time(NULL)' is used.
-%                   Default value 0.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('start_state', 0, @isscalar);
-p.addParamValue('seed', 0, @isscalar);
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack.
-sequence = mex_hmm_generate(model, sequence_length, ...
-	parsed.start_state, parsed.seed);
-
-
diff --git a/src/mlpack/bindings/matlab/kernel_pca/CMakeLists.txt b/src/mlpack/bindings/matlab/kernel_pca/CMakeLists.txt
deleted file mode 100644
index b2f8933..0000000
--- a/src/mlpack/bindings/matlab/kernel_pca/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(kernel_pca_mex SHARED
-  kernel_pca.cpp
-)
-target_link_libraries(kernel_pca_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS kernel_pca_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  kernel_pca.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/kernel_pca/kernel_pca.cpp b/src/mlpack/bindings/matlab/kernel_pca/kernel_pca.cpp
deleted file mode 100644
index 3257b71..0000000
--- a/src/mlpack/bindings/matlab/kernel_pca/kernel_pca.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/core/kernels/linear_kernel.hpp>
-#include <mlpack/core/kernels/gaussian_kernel.hpp>
-#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
-#include <mlpack/core/kernels/laplacian_kernel.hpp>
-#include <mlpack/core/kernels/polynomial_kernel.hpp>
-#include <mlpack/core/kernels/cosine_distance.hpp>
-
-#include <mlpack/methods/kernel_pca/kernel_pca.hpp>
-
-using namespace mlpack;
-using namespace mlpack::kpca;
-using namespace mlpack::kernel;
-using namespace std;
-using namespace arma;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 8)
-  {
-    mexErrMsgTxt("Expecting eight arguments.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  // Load input dataset.
-  if (mxDOUBLE_CLASS != mxGetClassID(prhs[0]))
-    mexErrMsgTxt("Input dataset must have type mxDOUBLE_CLASS.");
-
-  mat dataset(mxGetM(prhs[0]), mxGetN(prhs[0]));
-  double * values = mxGetPr(prhs[0]);
-  for (int i=0, num=mxGetNumberOfElements(prhs[0]); i<num; ++i)
-    dataset(i) = values[i];
-
-  // Get the new dimensionality, if it is necessary.
-  size_t newDim = dataset.n_rows;
-  const int argNewDim = (int) mxGetScalar(prhs[2]);
-  if (argNewDim != 0)
-  {
-    newDim = argNewDim;
-
-    if (newDim > dataset.n_rows)
-    {
-      stringstream ss;
-      ss << "New dimensionality (" << newDim
-          << ") cannot be greater than existing dimensionality ("
-          << dataset.n_rows << ")!";
-      mexErrMsgTxt(ss.str().c_str());
-    }
-  }
-
-  // Get the kernel type and make sure it is valid.
-  if (mxCHAR_CLASS != mxGetClassID(prhs[1]))
-  {
-    mexErrMsgTxt("Kernel input must have type mxCHAR_CLASS.");
-  }
-  int bufLength = mxGetNumberOfElements(prhs[1]) + 1;
-  char * buf;
-  buf = (char *) mxCalloc(bufLength, sizeof(char));
-  mxGetString(prhs[1], buf, bufLength);
-  string kernelType(buf);
-  mxFree(buf);
-
-  // scale parameter
-  const bool scaleData = (mxGetScalar(prhs[3]) == 1.0);
-
-  if (kernelType == "linear")
-  {
-    KernelPCA<LinearKernel> kpca(LinearKernel(), scaleData);
-    kpca.Apply(dataset, newDim);
-  }
-  else if (kernelType == "gaussian")
-  {
-    const double bandwidth = mxGetScalar(prhs[3]);
-
-    GaussianKernel kernel(bandwidth);
-    KernelPCA<GaussianKernel> kpca(kernel, scaleData);
-    kpca.Apply(dataset, newDim);
-  }
-  else if (kernelType == "polynomial")
-  {
-    const double degree = mxGetScalar(prhs[4]);
-    const double offset = mxGetScalar(prhs[5]);
-
-    PolynomialKernel kernel(offset, degree);
-    KernelPCA<PolynomialKernel> kpca(kernel, scaleData);
-    kpca.Apply(dataset, newDim);
-  }
-  else if (kernelType == "hyptan")
-  {
-    const double scale = mxGetScalar(prhs[6]);
-    const double offset = mxGetScalar(prhs[5]);
-
-    HyperbolicTangentKernel kernel(scale, offset);
-    KernelPCA<HyperbolicTangentKernel> kpca(kernel, scaleData);
-    kpca.Apply(dataset, newDim);
-  }
-  else if (kernelType == "laplacian")
-  {
-    const double bandwidth = mxGetScalar(prhs[7]);
-
-    LaplacianKernel kernel(bandwidth);
-    KernelPCA<LaplacianKernel> kpca(kernel, scaleData);
-    kpca.Apply(dataset, newDim);
-  }
-  else if (kernelType == "cosine")
-  {
-    KernelPCA<CosineDistance> kpca(CosineDistance(), scaleData);
-    kpca.Apply(dataset, newDim);
-  }
-  else
-  {
-    // Invalid kernel type.
-    stringstream ss;
-    ss << "Invalid kernel type ('" << kernelType << "'); valid choices "
-        << "are 'linear', 'gaussian', 'polynomial', 'hyptan', 'laplacian', and "
-        << "'cosine'.";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  // Now returning results to matlab
-  plhs[0] = mxCreateDoubleMatrix(dataset.n_rows, dataset.n_cols, mxREAL);
-  values = mxGetPr(plhs[0]);
-  for (int i = 0; i < dataset.n_rows * dataset.n_cols; ++i)
-  {
-    values[i] = dataset(i);
-  }
-
-}
diff --git a/src/mlpack/bindings/matlab/kernel_pca/kernel_pca.m b/src/mlpack/bindings/matlab/kernel_pca/kernel_pca.m
deleted file mode 100644
index fd8a1d1..0000000
--- a/src/mlpack/bindings/matlab/kernel_pca/kernel_pca.m
+++ /dev/null
@@ -1,71 +0,0 @@
-function result = kernel_pca(dataPoints, kernel, varargin)
-%Kernel Principal Components Analysis
-%
-%  This program performs Kernel Principal Components Analysis (KPCA) on the
-%  specified dataset with the specified kernel.  This will transform the data
-%  onto the kernel principal components, and optionally reduce the dimensionality
-%  by ignoring the kernel principal components with the smallest eigenvalues.
-%
-%  For the case where a linear kernel is used, this reduces to regular PCA.
-%
-%  The kernels that are supported are listed below:
-%
-%   * 'linear': the standard linear dot product (same as normal PCA):
-%      K(x, y) = x^T y
-%
-%   * 'gaussian': a Gaussian kernel; requires bandwidth:
-%      K(x, y) = exp(-(|| x - y || ^ 2) / (2 * (bandwidth ^ 2)))
-%
-%   * 'polynomial': polynomial kernel; requires offset and degree:
-%      K(x, y) = (x^T y + offset) ^ degree
-%
-%   * 'hyptan': hyperbolic tangent kernel; requires scale and offset:
-%      K(x, y) = tanh(scale * (x^T y) + offset)
-%
-%   * 'laplacian': Laplacian kernel; requires bandwidth:
-%      K(x, y) = exp(-(|| x - y ||) / bandwidth)
-%
-%   * 'cosine': cosine distance:
-%      K(x, y) = 1 - (x^T y) / (|| x || * || y ||)
-%
-%  The parameters for each of the kernels should be specified with the options
-%  bandwidth, kernel_scale, offset, or degree (or a combination of those
-%  options).
-%
-%Parameters
-% dataPoints         - (required) Input dataset to perform KPCA on.
-% kernel             - (required) The kernel to use.
-% new_dimensionality - (optional) If not 0, reduce the dimensionality of the
-%                      dataset by ignoring the dimensions with the smallest
-%                      eighenvalues.
-% bandwidth          - (optional) Bandwidt, for gaussian or laplacian kernels.
-%                      Default value is 1.
-% degree             - (optional)  Degree of polynomial, for 'polynomial' kernel.
-%                      Default value 1.
-% kernel_scale       - (optional) Scale, for 'hyptan' kernel.  Default value 1.
-% offset             - (optional) Offset, for 'hyptan' and 'polynomial' kernels.
-%								       Default value is 1.
-% scale              - (optional) If true, the data will be scaled before performing
-%                      KPCA such that the variance of each feature is 1.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('new_dimensionality', @isscalar);
-p.addParamValue('offset', @isscalar);
-p.addParamValue('kernel_scale', @isscalar);
-p.addParamValue('bandwidth', @isscalar);
-p.addParamValue('degree', @isscalar);
-p.addParamValue('scale', false, @(x) (x == true) || (x == false));
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack. transposing to machine learning standards.
-result = mex_kernel_pca(dataPoints', kernel, ...
-	parsed.new_dimensionality, parsed.scale, ...
-	parsed.degree, parsed.offset, ...
-	parsed.kernel_scale, parsed.bandwidth);
-
-result = result';
-
diff --git a/src/mlpack/bindings/matlab/kmeans/CMakeLists.txt b/src/mlpack/bindings/matlab/kmeans/CMakeLists.txt
deleted file mode 100644
index 4c0c06b..0000000
--- a/src/mlpack/bindings/matlab/kmeans/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(kmeans_mex SHARED
-  kmeans.cpp
-)
-target_link_libraries(kmeans_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS kmeans_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  kmeans.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/kmeans/kmeans.cpp b/src/mlpack/bindings/matlab/kmeans/kmeans.cpp
deleted file mode 100644
index bccd9cf..0000000
--- a/src/mlpack/bindings/matlab/kmeans/kmeans.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * @file kmeans.cpp
- * @author Patrick Mason
- *
- * MEX function for MATLAB k-means binding.
- */
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/kmeans/kmeans.hpp>
-#include <mlpack/methods/kmeans/allow_empty_clusters.hpp>
-
-using namespace mlpack;
-using namespace mlpack::kmeans;
-using namespace std;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 7)
-  {
-    mexErrMsgTxt("Expecting seven arguments.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  size_t seed = (size_t) mxGetScalar(prhs[6]);
-
-  // Initialize random seed.
-  //if (CLI::GetParam<int>("seed") != 0)
-    //math::RandomSeed((size_t) CLI::GetParam<int>("seed"));
-  if (seed != 0)
-    math::RandomSeed(seed);
-  else
-    math::RandomSeed((size_t) std::time(NULL));
-
-  // Now do validation of options.
-  //string inputFile = CLI::GetParam<string>("inputFile");
-  //int clusters = CLI::GetParam<int>("clusters");
-  int clusters = (int) mxGetScalar(prhs[1]);
-  if (clusters < 1)
-  {
-    stringstream ss;
-    ss << "Invalid number of clusters requested (" << clusters << ")! "
-        << "Must be greater than or equal to 1.";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  //int maxIterations = CLI::GetParam<int>("max_iterations");
-  int maxIterations = (int) mxGetScalar(prhs[2]);
-  if (maxIterations < 0)
-  {
-    stringstream ss;
-    ss << "Invalid value for maximum iterations (" << maxIterations <<
-        ")! Must be greater than or equal to 0.";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  //double overclustering = CLI::GetParam<double>("overclustering");
-  double overclustering = mxGetScalar(prhs[3]);
-  if (overclustering < 1)
-  {
-    stringstream ss;
-    ss << "Invalid value for overclustering (" << overclustering <<
-        ")! Must be greater than or equal to 1.";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  const bool allow_empty_clusters = (mxGetScalar(prhs[4]) == 1.0);
-  const bool fast_kmeans = (mxGetScalar(prhs[5]) == 1.0);
-
-  /*
-  // Make sure we have an output file if we're not doing the work in-place.
-  if (!CLI::HasParam("in_place") && !CLI::HasParam("outputFile"))
-  {
-    Log::Fatal << "--outputFile not specified (and --in_place not set)."
-        << std::endl;
-  }
-  */
-
-  // Load our dataset.
-  const size_t numPoints = mxGetN(prhs[0]);
-  const size_t numDimensions = mxGetM(prhs[0]);
-  arma::mat dataset(numDimensions, numPoints);
-
-  // setting the values.
-  double * mexDataPoints = mxGetPr(prhs[0]);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-  {
-    dataset(i) = mexDataPoints[i];
-  }
-
-  // Now create the KMeans object.  Because we could be using different types,
-  // it gets a little weird...
-  arma::Col<size_t> assignments;
-
-  //if (CLI::HasParam("allow_empty_clusters"))
-  if (allow_empty_clusters)
-  {
-    KMeans<metric::SquaredEuclideanDistance, RandomPartition,
-        AllowEmptyClusters> k(maxIterations, overclustering);
-
-    //if (CLI::HasParam("fast_kmeans"))
-    if (fast_kmeans)
-      k.FastCluster(dataset, clusters, assignments);
-    else
-      k.Cluster(dataset, clusters, assignments);
-  }
-  else
-  {
-    KMeans<> k(maxIterations, overclustering);
-
-    //if (CLI::HasParam("fast_kmeans"))
-    if (fast_kmeans)
-      k.FastCluster(dataset, clusters, assignments);
-    else
-      k.Cluster(dataset, clusters, assignments);
-  }
-
-  /*
-  // Now figure out what to do with our results.
-  if (CLI::HasParam("in_place"))
-  {
-    // Add the column of assignments to the dataset; but we have to convert them
-    // to type double first.
-    arma::vec converted(assignments.n_elem);
-    for (size_t i = 0; i < assignments.n_elem; i++)
-      converted(i) = (double) assignments(i);
-
-    dataset.insert_rows(dataset.n_rows, trans(converted));
-
-    // Save the dataset.
-    data::Save(inputFile.c_str(), dataset);
-  }
-  else
-  {
-    if (CLI::HasParam("labels_only"))
-    {
-      // Save only the labels.
-      string outputFile = CLI::GetParam<string>("outputFile");
-      arma::Mat<size_t> output = trans(assignments);
-      data::Save(outputFile.c_str(), output);
-    }
-    else
-    {
-      // Convert the assignments to doubles.
-      arma::vec converted(assignments.n_elem);
-      for (size_t i = 0; i < assignments.n_elem; i++)
-        converted(i) = (double) assignments(i);
-
-      dataset.insert_rows(dataset.n_rows, trans(converted));
-
-      // Now save, in the different file.
-      string outputFile = CLI::GetParam<string>("outputFile");
-      data::Save(outputFile.c_str(), dataset);
-    }
-  }
-  */
-
-  // constructing matrix to return to matlab
-  plhs[0] = mxCreateDoubleMatrix(assignments.n_elem, 1, mxREAL);
-
-  // setting the values
-  double * out = mxGetPr(plhs[0]);
-  for (int i = 0, n = assignments.n_elem; i < n; ++i)
-  {
-    out[i] = assignments(i);
-  }
-
-}
-
diff --git a/src/mlpack/bindings/matlab/kmeans/kmeans.m b/src/mlpack/bindings/matlab/kmeans/kmeans.m
deleted file mode 100644
index 031702c..0000000
--- a/src/mlpack/bindings/matlab/kmeans/kmeans.m
+++ /dev/null
@@ -1,28 +0,0 @@
-function assignments = emst(dataPoints, clusters, varargin)
-%K-Means Clustering
-%
-%  This program performs K-Means clustering on the given dataset, storing the
-%  learned cluster assignments either as a column of labels in the file
-%  containing the input dataset or in a separate file.  Empty clusters are not
-%  allowed by default; when a cluster becomes empty, the point furthest from the
-%  centroid of the cluster with maximum variance is taken to fill that cluster.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('allow_empty_clusters', false, @(x) (x == true) || (x == false));
-p.addParamValue('fast_kmeans', false, @(x) (x == true) || (x == false));
-p.addParamValue('max_iterations', 1000, @isscalar);
-p.addParamValue('overclustering', 1, @isscalar);
-p.addParamValue('seed', 0, @isscalar);
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack. transposing to machine learning standards.
-assignments = mex_kmeans(dataPoints', clusters, parsed.max_iterations, ...
-	parsed.overclustering, parsed.allow_empty_clusters, ...
-	parsed.fast_kmeans, parsed.seed);
-
-assignments = assignments + 1; % changing to matlab indexing
-
diff --git a/src/mlpack/bindings/matlab/lars/CMakeLists.txt b/src/mlpack/bindings/matlab/lars/CMakeLists.txt
deleted file mode 100644
index ad7ad3a..0000000
--- a/src/mlpack/bindings/matlab/lars/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(lars_mex SHARED
-  lars.cpp
-)
-target_link_libraries(lars_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS lars_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  lars.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/lars/lars.cpp b/src/mlpack/bindings/matlab/lars/lars.cpp
deleted file mode 100644
index 4908a16..0000000
--- a/src/mlpack/bindings/matlab/lars/lars.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/lars/lars.hpp>
-
-using namespace arma;
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::regression;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 4)
-  {
-    mexErrMsgTxt("Expecting four inputs.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  double lambda1 = mxGetScalar(prhs[2]);
-  double lambda2 = mxGetScalar(prhs[3]);
-  bool useCholesky = (mxGetScalar(prhs[3]) == 1.0);
-
-  // loading covariates
-  mat matX(mxGetM(prhs[0]), mxGetN(prhs[0]));
-  double * values = mxGetPr(prhs[0]);
-  for (int i=0, num=mxGetNumberOfElements(prhs[0]); i<num; ++i)
-    matX(i) = values[i];
-
-  // loading responses
-  mat matY(mxGetM(prhs[1]), mxGetN(prhs[1]));
-  values = mxGetPr(prhs[1]);
-  for (int i=0, num=mxGetNumberOfElements(prhs[1]); i<num; ++i)
-    matY(i) = values[i];
-
-  if (matY.n_cols > 1)
-    mexErrMsgTxt("Only one column or row allowed in responses file!");
-
-  if (matY.n_elem != matX.n_rows)
-    mexErrMsgTxt("Number of responses must be equal to number of rows of X!");
-
-  // Do LARS.
-  LARS lars(useCholesky, lambda1, lambda2);
-  vec beta;
-  lars.Regress(matX, matY.unsafe_col(0), beta, false /* do not transpose */);
-
-  // return to matlab
-  plhs[0] = mxCreateDoubleMatrix(beta.n_elem, 1, mxREAL);
-  values = mxGetPr(plhs[0]);
-  for (int i = 0; i < beta.n_elem; ++i)
-    values[i] = beta(i);
-}
diff --git a/src/mlpack/bindings/matlab/lars/lars.m b/src/mlpack/bindings/matlab/lars/lars.m
deleted file mode 100644
index 13b4812..0000000
--- a/src/mlpack/bindings/matlab/lars/lars.m
+++ /dev/null
@@ -1,48 +0,0 @@
-function beta = lars(X, Y, varargin)
-%LARS
-%
-%  An implementation of LARS: Least Angle Regression (Stagewise/laSso).  This is
-%  a stage-wise homotopy-based algorithm for L1-regularized linear regression
-%  (LASSO) and L1+L2-regularized linear regression (Elastic Net).
-%
-%  Let X be a matrix where each row is a point and each column is a dimension,
-%  and let y be a vector of targets.
-%
-%  The Elastic Net problem is to solve
-%
-%    min_beta 0.5 || X * beta - y ||_2^2 + lambda_1 ||beta||_1 +
-%        0.5 lambda_2 ||beta||_2^2
-%
-%  If lambda_1 > 0 and lambda_2 = 0, the problem is the LASSO.
-%  If lambda_1 > 0 and lambda_2 > 0, the problem is the Elastic Net.
-%  If lambda_1 = 0 and lambda_2 > 0, the problem is Ridge Regression.
-%  If lambda_1 = 0 and lambda_2 = 0, the problem is unregularized linear
-%  regression.
-%
-%  For efficiency reasons, it is not recommended to use this algorithm with
-%  lambda_1 = 0.
-%
-%Parameters
-% X         		 - (required) Matrix containing covariates.
-% Y              - (required) Matrix containing y.
-% lambda1			   - (optional) Default value 0. l1-penalty regularization.
-% lambda2				 - (optional) Default value 0. l2-penalty regularization.
-% useCholesky    - (optional) Use Cholesky decomposition during computation
-%                             rather than explicitly computing the full Gram
-%                             matrix.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('lambda1', @isscalar);
-p.addParamValue('lambda2', @isscalar);
-p.addParamValue('useCholesky', false, @(x) (x == true) || (x == false));
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack. Does not require transposing.
-beta = mex_lars(X, Y, ...
-	parsed.lambda1, parsed.lambda2, parsed.useCholesky);
-
-
diff --git a/src/mlpack/bindings/matlab/nca/CMakeLists.txt b/src/mlpack/bindings/matlab/nca/CMakeLists.txt
deleted file mode 100644
index da5a327..0000000
--- a/src/mlpack/bindings/matlab/nca/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(nca_mex SHARED
-  nca.cpp
-)
-target_link_libraries(nca_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS nca_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  nca.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/nca/nca.cpp b/src/mlpack/bindings/matlab/nca/nca.cpp
deleted file mode 100644
index 3edd26b..0000000
--- a/src/mlpack/bindings/matlab/nca/nca.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/core/metrics/lmetric.hpp>
-
-#include <mlpack/methods/nca/nca.hpp>
-
-using namespace mlpack;
-using namespace mlpack::nca;
-using namespace mlpack::metric;
-using namespace std;
-using namespace arma;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 2)
-  {
-    mexErrMsgTxt("Expecting two inputs.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  // Load data.
-  mat data(mxGetM(prhs[0]), mxGetN(prhs[0]));
-  double * values = mxGetPr(prhs[0]);
-  for (int i=0, num=mxGetNumberOfElements(prhs[0]); i<num; ++i)
-    data(i) = values[i];
-
-  // load labels
-  umat labels(mxGetNumberOfElements(prhs[1]), 1);
-  values = mxGetPr(prhs[1]);
-  for (int i=0, num=mxGetNumberOfElements(prhs[1]); i<num; ++i)
-    labels(i) = (int) values[i];
-
-  // dimension checks
-  if (labels.n_elem != data.n_cols)
-    mexErrMsgTxt("Labels vector and data have unmatching dimensions.");
-
-  // Now create the NCA object and run the optimization.
-  NCA<LMetric<2> > nca(data, labels.unsafe_col(0));
-
-  mat distance;
-  nca.LearnDistance(distance);
-
-  // return to matlab
-  plhs[0] = mxCreateDoubleMatrix(distance.n_rows, distance.n_cols, mxREAL);
-  values = mxGetPr(plhs[0]);
-  for (int i = 0; i < distance.n_elem; ++i)
-    values[i] = distance(i);
-}
diff --git a/src/mlpack/bindings/matlab/nca/nca.m b/src/mlpack/bindings/matlab/nca/nca.m
deleted file mode 100644
index 54b9a8b..0000000
--- a/src/mlpack/bindings/matlab/nca/nca.m
+++ /dev/null
@@ -1,24 +0,0 @@
-function result = nca(dataPoints, labels)
-%Neighborhood Components Analysis (NCA)
-%
-%  This program implements Neighborhood Components Analysis, both a linear
-%  dimensionality reduction technique and a distance learning technique.  The
-%  method seeks to improve k-nearest-neighbor classification on a dataset by
-%  scaling the dimensions.  The method is nonparametric, and does not require a
-%  value of k.  It works by using stochastic ("soft") neighbor assignments and
-%  using optimization techniques over the gradient of the accuracy of the
-%  neighbor assignments.
-%
-%  To work, this algorithm needs labeled data.  It can be given as the last row
-%  of the input dataset (--input_file), or alternatively in a separate file
-%  (--labels_file).
-%
-%Parameters:
-% dataPoints - Input dataset to run NCA on.
-% labels     - Labels for input dataset.
-
-% interfacing with mlpack. transposing to machine learning standards.
-result = mex_nca(dataPoints', labels);
-result = result';
-
-
diff --git a/src/mlpack/bindings/matlab/nmf/CMakeLists.txt b/src/mlpack/bindings/matlab/nmf/CMakeLists.txt
deleted file mode 100644
index 255de6f..0000000
--- a/src/mlpack/bindings/matlab/nmf/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(nmf_mex SHARED
-  nmf.cpp
-)
-target_link_libraries(nmf_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS nmf_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  nmf.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/nmf/nmf.cpp b/src/mlpack/bindings/matlab/nmf/nmf.cpp
deleted file mode 100644
index 373abab..0000000
--- a/src/mlpack/bindings/matlab/nmf/nmf.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/nmf/nmf.hpp>
-
-#include <mlpack/methods/nmf/random_init.hpp>
-#include <mlpack/methods/nmf/mult_dist_update_rules.hpp>
-#include <mlpack/methods/nmf/mult_div_update_rules.hpp>
-#include <mlpack/methods/nmf/als_update_rules.hpp>
-
-using namespace mlpack;
-using namespace mlpack::nmf;
-using namespace std;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 6)
-  {
-    mexErrMsgTxt("Expecting six inputs.");
-  }
-
-  if (nlhs != 2)
-  {
-    mexErrMsgTxt("Two outputs required.");
-  }
-
-  const size_t seed = (size_t) mxGetScalar(prhs[5]);
-
-  // Initialize random seed.
-  if (seed != 0)
-    math::RandomSeed(seed);
-  else
-    math::RandomSeed((size_t) std::time(NULL));
-
-  // Gather parameters.
-  const size_t r = (size_t) mxGetScalar(prhs[1]);
-  const size_t maxIterations = (size_t) mxGetScalar(prhs[2]);
-  const double minResidue = mxGetScalar(prhs[3]);
-
-  // update rule
-  int bufLength = mxGetNumberOfElements(prhs[4]) + 1;
-  char * buf = (char *) mxCalloc(bufLength, sizeof(char));
-  mxGetString(prhs[4], buf, bufLength);
-  string updateRules(buf);
-  mxFree(buf);
-
-  // Validate rank.
-  if (r < 1)
-  {
-    mexErrMsgTxt("The rank of the factorization cannot be less than 1.");
-  }
-
-  if ((updateRules != "multdist") &&
-      (updateRules != "multdiv") &&
-      (updateRules != "als"))
-  {
-    stringstream ss;
-    ss << "Invalid update rules ('" << updateRules << "'); must be '"
-        << "multdist', 'multdiv', or 'als'.";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  // Load input dataset.
-  arma::mat V(mxGetM(prhs[0]), mxGetN(prhs[0]));
-  double * values = mxGetPr(prhs[0]);
-  for (int i=0, num=mxGetNumberOfElements(prhs[0]); i<num; ++i)
-    V(i) = values[i];
-
-  arma::mat W;
-  arma::mat H;
-
-  // Perform NMF with the specified update rules.
-  if (updateRules == "multdist")
-  {
-    NMF<> nmf(maxIterations, minResidue);
-    nmf.Apply(V, r, W, H);
-  }
-  else if (updateRules == "multdiv")
-  {
-    NMF<RandomInitialization,
-        WMultiplicativeDivergenceRule,
-        HMultiplicativeDivergenceRule> nmf(maxIterations, minResidue);
-    nmf.Apply(V, r, W, H);
-  }
-  else if (updateRules == "als")
-  {
-    NMF<RandomInitialization,
-        WAlternatingLeastSquaresRule,
-        HAlternatingLeastSquaresRule> nmf(maxIterations, minResidue);
-    nmf.Apply(V, r, W, H);
-  }
-
-  // return to matlab
-  plhs[0] = mxCreateDoubleMatrix(W.n_rows, W.n_cols, mxREAL);
-  values = mxGetPr(plhs[0]);
-  for (int i = 0; i < W.n_elem; ++i)
-    values[i] = W(i);
-
-  plhs[1] = mxCreateDoubleMatrix(H.n_rows, H.n_cols, mxREAL);
-  values = mxGetPr(plhs[0]);
-  for (int i = 0; i < H.n_elem; ++i)
-    values[i] = H(i);
-}
diff --git a/src/mlpack/bindings/matlab/nmf/nmf.m b/src/mlpack/bindings/matlab/nmf/nmf.m
deleted file mode 100644
index 0766c81..0000000
--- a/src/mlpack/bindings/matlab/nmf/nmf.m
+++ /dev/null
@@ -1,58 +0,0 @@
-function [W H] = nmf(dataPoints, rank, varargin)
-%Non-negative Matrix Factorization
-%
-%  This program performs non-negative matrix factorization on the given dataset,
-%  storing the resulting decomposed matrices in the specified files.  For an
-%  input dataset V, NMF decomposes V into two matrices W and H such that
-%
-%  V = W * H
-%
-%  where all elements in W and H are non-negative.  If V is of size (n x m), then
-%  W will be of size (n x r) and H will be of size (r x m), where r is the rank
-%  of the factorization (specified by --rank).
-%
-%  Optionally, the desired update rules for each NMF iteration can be chosen from
-%  the following list:
-%
-%   - multdist: multiplicative distance-based update rules (Lee and Seung 1999)
-%   - multdiv: multiplicative divergence-based update rules (Lee and Seung 1999)
-%   - als: alternating least squares update rules (Paatero and Tapper 1994)
-%
-%  The maximum number of iterations is specified with 'max_iterations', and the
-%  minimum residue required for algorithm termination is specified with
-%  'min_residue'.
-%
-%Parameters:
-% dataPoints        - (required) Input dataset to perform NMF on.
-% rank							- (required) Rank of the factorization.
-% max_iterations    - (optional) Number of iterations before NMF terminates.
-%																 (Default value 10000.)
-% min_residue			  - (optional) The minimum root mean square residue allowed for
-%                                each iteration, below which the program
-%                                terminates.  Default value 1e-05.
-% seed							- (optional) Random seed.If 0, 'std::time(NULL)' is used.
-%														     Default 0.
-% update rules			- (optional) Update rules for each iteration; ( multdist |
-%                                multdiv | als ).  Default value 'multdist'.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('max_iterations', 10000, @isscalar);
-p.addParamValue('min_residue', 1e-05, @isscalar);
-p.addParamValue('update_rules', 'multdist', @ischar);
-p.addParamValue('seed', 0, @isscalar);
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack. transposing for machine learning standards.
-[W H] = mex_nmf(dataPoints', rank, ...
-	parsed.max_iterations, parsed.min_residue, ...
-	parsed.update_rules, parsed.seed);
-W = W';
-H = H';
-
-
-
-
diff --git a/src/mlpack/bindings/matlab/pca/CMakeLists.txt b/src/mlpack/bindings/matlab/pca/CMakeLists.txt
deleted file mode 100644
index fd03c8d..0000000
--- a/src/mlpack/bindings/matlab/pca/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(pca_mex SHARED
-  pca.cpp
-)
-target_link_libraries(pca_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS pca_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  pca.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/pca/pca.cpp b/src/mlpack/bindings/matlab/pca/pca.cpp
deleted file mode 100644
index ba9fe31..0000000
--- a/src/mlpack/bindings/matlab/pca/pca.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/pca/pca.hpp>
-
-using namespace mlpack;
-using namespace mlpack::pca;
-using namespace std;
-
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // argument checks
-  if (nrhs != 3)
-  {
-    mexErrMsgTxt("Expecting three inputs.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  // loading the data
-  double * mexDataPoints = mxGetPr(prhs[0]);
-  size_t numPoints = mxGetN(prhs[0]);
-  size_t numDimensions = mxGetM(prhs[0]);
-  arma::mat dataset(numDimensions, numPoints);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-    dataset(i) = mexDataPoints[i];
-
-  // Find out what dimension we want.
-  size_t newDimension = dataset.n_rows; // No reduction, by default.
-
-  if (mxGetScalar(prhs[1]) != 0.0)
-  {
-    // Validate the parameter.
-    newDimension = (size_t) mxGetScalar(prhs[1]);
-    if (newDimension > dataset.n_rows)
-    {
-      std::stringstream ss;
-      ss << "New dimensionality (" << newDimension
-          << ") cannot be greater than existing dimensionality ("
-          << dataset.n_rows << ")!";
-      mexErrMsgTxt(ss.str().c_str());
-    }
-  }
-
-  // Get the options for running PCA.
-  const bool scale = (mxGetScalar(prhs[2]) == 1.0);
-
-  // Perform PCA.
-  PCA p(scale);
-  p.Apply(dataset, newDimension);
-
-  // Now returning results to matlab
-  plhs[0] = mxCreateDoubleMatrix(dataset.n_rows, dataset.n_cols, mxREAL);
-  double * values = mxGetPr(plhs[0]);
-  for (int i = 0; i < dataset.n_rows * dataset.n_cols; ++i)
-    values[i] = dataset(i);
-}
diff --git a/src/mlpack/bindings/matlab/pca/pca.m b/src/mlpack/bindings/matlab/pca/pca.m
deleted file mode 100644
index 1b0a34c..0000000
--- a/src/mlpack/bindings/matlab/pca/pca.m
+++ /dev/null
@@ -1,33 +0,0 @@
-function result = pca(dataPoints, varargin)
-%Principal Components Analysis
-%
-%  This program performs principal components analysis on the given dataset.  It
-%  will transform the data onto its principal components, optionally performing
-%  dimensionality reduction by ignoring the principal components with the
-%  smallest eigenvalues.
-%
-%Parameters:
-% dataPoints        - (required) Matrix to perform PCA on.
-% newDimensionality - (optional) Desired dimensionality of output dataset.  If 0,
-%                                no dimensionality reduction is performed.
-%                                Default value 0.
-% scale             - (optional) If set, the data will be scaled before running
-%                                PCA, such that the variance of each feature is
-%                                1. Default value is false.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('newDimensionality', 0, @isscalar);
-p.addParamValue('scale', false, @(x) (x == true) || (x == false));
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack
-result = mex_pca(dataPoints', parsed.newDimensionality, parsed.scale);
-result = result';
-
-
-
-
diff --git a/src/mlpack/bindings/matlab/range_search/CMakeLists.txt b/src/mlpack/bindings/matlab/range_search/CMakeLists.txt
deleted file mode 100644
index e12ea30..0000000
--- a/src/mlpack/bindings/matlab/range_search/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Simple rules for building mex file.  The _mex suffix is necessary to avoid
-# target name conflicts, and the mex file must have a different name than the .m
-# file.
-add_library(range_search_mex SHARED
-  range_search.cpp
-)
-target_link_libraries(range_search_mex
-  mlpack
-  ${LIBXML2_LIBRARIES}
-)
-
-# Installation rule.  Install both the mex and the MATLAB file.
-install(TARGETS range_search_mex
-  LIBRARY DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
-install(FILES
-  range_search.m
-  DESTINATION "${MATLAB_TOOLBOX_DIR}/mlpack/"
-)
diff --git a/src/mlpack/bindings/matlab/range_search/range_search.cpp b/src/mlpack/bindings/matlab/range_search/range_search.cpp
deleted file mode 100644
index e66fdd6..0000000
--- a/src/mlpack/bindings/matlab/range_search/range_search.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/**
- * @file range_search.cpp
- * @author Patrick Mason
- *
- * MEX function for MATLAB range search binding.
- */
-#include "mex.h"
-
-#include <mlpack/core.hpp>
-#include <mlpack/core/metrics/lmetric.hpp>
-#include <mlpack/methods/range_search/range_search.hpp>
-
-using namespace std;
-using namespace mlpack;
-using namespace mlpack::range;
-using namespace mlpack::tree;
-
-typedef RangeSearch<metric::SquaredEuclideanDistance,
-    BinarySpaceTree<bound::HRectBound<2>, EmptyStatistic> > RSType;
-
-// the gateway, required by all mex functions
-void mexFunction(int nlhs, mxArray *plhs[],
-                 int nrhs, const mxArray *prhs[])
-{
-  // Give CLI the command line parameters the user passed in.
-  //CLI::ParseCommandLine(argc, argv);
-
-  // Get all the parameters.
-  //string referenceFile = CLI::GetParam<string>("reference_file");
-  //string distancesFile = CLI::GetParam<string>("distances_file");
-  //string neighborsFile = CLI::GetParam<string>("neighbors_file");
-
-  //int lsInt = CLI::GetParam<int>("leaf_size");
-  //double max = CLI::GetParam<double>("max");
-  //double min = CLI::GetParam<double>("min");
-  //bool naive = CLI::HasParam("naive");
-  //bool singleMode = CLI::HasParam("single_mode");
-
-  // argument checks
-  if (nrhs != 7)
-  {
-    mexErrMsgTxt("Expecting an datapoints matrix, isBoruvka, and leafSize.");
-  }
-
-  if (nlhs != 1)
-  {
-    mexErrMsgTxt("Output required.");
-  }
-
-  double max = mxGetScalar(prhs[1]);
-  double min = mxGetScalar(prhs[2]);
-  int lsInt = (int) mxGetScalar(prhs[4]);
-  bool naive = (mxGetScalar(prhs[5]) == 1.0);
-  bool singleMode = (mxGetScalar(prhs[6]) == 1.0);
-
-  // checking for query data
-  bool hasQueryData = ((mxGetM(prhs[3]) != 0) && (mxGetN(prhs[3]) != 0));
-  arma::mat queryData;
-
-  // setting the dataset values.
-  double * mexDataPoints = mxGetPr(prhs[0]);
-  size_t numPoints = mxGetN(prhs[0]);
-  size_t numDimensions = mxGetM(prhs[0]);
-  arma::mat referenceData(numDimensions, numPoints);
-  for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-  {
-    referenceData(i) = mexDataPoints[i];
-  }
-
-  //if (!data::Load(referenceFile.c_str(), referenceData))
-  //  Log::Fatal << "Reference file " << referenceFile << "not found." << endl;
-
-  //Log::Info << "Loaded reference data from '" << referenceFile << "'." << endl;
-
-  // Sanity check on range value: max must be greater than min.
-  if (max <= min)
-  {
-    stringstream ss;
-    ss << "Invalid range: maximum (" << max << ") must be greater than "
-        << "minimum (" << min << ").";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  // Sanity check on leaf size.
-  if (lsInt < 0)
-  {
-    stringstream ss;
-    ss << "Invalid leaf size: " << lsInt << ".  Must be greater "
-        "than or equal to 0.";
-    mexErrMsgTxt(ss.str().c_str());
-  }
-
-  size_t leafSize = lsInt;
-
-  // Naive mode overrides single mode.
-  if (singleMode && naive)
-  {
-    mexWarnMsgTxt("single_mode ignored because naive is present.");
-  }
-
-  if (naive)
-    leafSize = referenceData.n_cols;
-
-  vector<vector<size_t> > neighbors;
-  vector<vector<double> > distances;
-
-  // Because we may construct it differently, we need a pointer.
-  RSType* rangeSearch = NULL;
-
-  // Mappings for when we build the tree.
-  vector<size_t> oldFromNewRefs;
-
-  // Build trees by hand, so we can save memory: if we pass a tree to
-  // NeighborSearch, it does not copy the matrix.
-  //Log::Info << "Building reference tree..." << endl;
-  //Timer::Start("tree_building");
-
-  BinarySpaceTree<bound::HRectBound<2>, tree::EmptyStatistic>
-      refTree(referenceData, oldFromNewRefs, leafSize);
-  BinarySpaceTree<bound::HRectBound<2>, tree::EmptyStatistic>*
-      queryTree = NULL; // Empty for now.
-
-  //Timer::Stop("tree_building");
-
-  std::vector<size_t> oldFromNewQueries;
-
-  //if (CLI::GetParam<string>("query_file") != "")
-  if (hasQueryData)
-  {
-    //string queryFile = CLI::GetParam<string>("query_file");
-    //if (!data::Load(queryFile.c_str(), queryData))
-    //  Log::Fatal << "Query file " << queryFile << " not found" << endl;
-
-    // setting the values.
-    mexDataPoints = mxGetPr(prhs[3]);
-    numPoints = mxGetN(prhs[3]);
-    numDimensions = mxGetM(prhs[3]);
-    queryData = arma::mat(numDimensions, numPoints);
-    for (int i = 0, n = numPoints * numDimensions; i < n; ++i)
-    {
-      queryData(i) = mexDataPoints[i];
-    }
-
-    if (naive && leafSize < queryData.n_cols)
-      leafSize = queryData.n_cols;
-
-    //Log::Info << "Loaded query data from '" << queryFile << "'." << endl;
-
-    //Log::Info << "Building query tree..." << endl;
-
-    // Build trees by hand, so we can save memory: if we pass a tree to
-    // NeighborSearch, it does not copy the matrix.
-    //Timer::Start("tree_building");
-
-    queryTree = new BinarySpaceTree<bound::HRectBound<2>,
-        tree::EmptyStatistic >(queryData, oldFromNewQueries,
-        leafSize);
-
-    //Timer::Stop("tree_building");
-
-    rangeSearch = new RSType(&refTree, queryTree, referenceData, queryData,
-        singleMode);
-
-    //Log::Info << "Tree built." << endl;
-  }
-  else
-  {
-    rangeSearch = new RSType(&refTree, referenceData, singleMode);
-
-    //Log::Info << "Trees built." << endl;
-  }
-
-  //Log::Info << "Computing neighbors within range [" << min << ", " << max
-  //    << "]." << endl;
-
-  math::Range r = math::Range(min, max);
-  rangeSearch->Search(r, neighbors, distances);
-
-  //Log::Info << "Neighbors computed." << endl;
-
-  // We have to map back to the original indices from before the tree
-  // construction.
-  //Log::Info << "Re-mapping indices..." << endl;
-
-  vector<vector<double> > distancesOut;
-  distancesOut.resize(distances.size());
-  vector<vector<size_t> > neighborsOut;
-  neighborsOut.resize(neighbors.size());
-
-  // Do the actual remapping.
-  //if (CLI::GetParam<string>("query_file") != "")
-  if (hasQueryData)
-  {
-    for (size_t i = 0; i < distances.size(); ++i)
-    {
-      // Map distances (copy a column).
-      distancesOut[oldFromNewQueries[i]] = distances[i];
-
-      // Map indices of neighbors.
-      neighborsOut[oldFromNewQueries[i]].resize(neighbors[i].size());
-      for (size_t j = 0; j < distances[i].size(); ++j)
-      {
-        neighborsOut[oldFromNewQueries[i]][j] = oldFromNewRefs[neighbors[i][j]];
-      }
-    }
-  }
-  else
-  {
-    for (size_t i = 0; i < distances.size(); ++i)
-    {
-      // Map distances (copy a column).
-      distancesOut[oldFromNewRefs[i]] = distances[i];
-
-      // Map indices of neighbors.
-      neighborsOut[oldFromNewRefs[i]].resize(neighbors[i].size());
-      for (size_t j = 0; j < distances[i].size(); ++j)
-      {
-        neighborsOut[oldFromNewRefs[i]][j] = oldFromNewRefs[neighbors[i][j]];
-      }
-    }
-  }
-
-  // Setting values to be returned to matlab
-  mwSize ndim = 1;
-  mwSize dims[1] = {distancesOut.size()};
-  const char * fieldNames[2] = {
-    "neighbors"
-    , "distances"
-  };
-
-  plhs[0] = mxCreateStructArray(ndim, dims, 2, fieldNames);
-
-  // setting the structure elements
-  for (int i=0; i<distancesOut.size(); ++i)
-  {
-    mxArray * tmp;
-    double * values;
-
-    // settings the neighbors
-    const size_t numElements = distancesOut[i].size();
-    tmp = mxCreateDoubleMatrix(1, numElements, mxREAL);
-    values = mxGetPr(tmp);
-    for (int j=0; j<numElements; ++j)
-    {
-      // converting to matlab's index offset
-      values[j] = neighborsOut[i][j] + 1;
-    }
-    // note: SetField does not copy the data structure.
-    // mxDuplicateArray does the necessary copying.
-    mxSetFieldByNumber(plhs[0], i, 0, mxDuplicateArray(tmp));
-    mxDestroyArray(tmp);
-
-    // setting the distances
-    tmp = mxCreateDoubleMatrix(1, numElements, mxREAL);
-    values = mxGetPr(tmp);
-    for (int j=0; j<numElements; ++j)
-    {
-      values[j] = distancesOut[i][j];
-    }
-    mxSetFieldByNumber(plhs[0], i, 1, mxDuplicateArray(tmp));
-    mxDestroyArray(tmp);
-  }
-
-  // Clean up.
-  if (queryTree)
-    delete queryTree;
-  delete rangeSearch;
-
-  /*
-  // Save output.  We have to do this by hand.
-  fstream distancesStr(distancesFile.c_str(), fstream::out);
-  if (!distancesStr.is_open())
-  {
-    Log::Warn << "Cannot open file '" << distancesFile << "' to save output "
-        << "distances to!" << endl;
-  }
-  else
-  {
-    // Loop over each point.
-    for (size_t i = 0; i < distancesOut.size(); ++i)
-    {
-      // Store the distances of each point.  We may have 0 points to store, so
-      // we must account for that possibility.
-      for (size_t j = 0; j + 1 < distancesOut[i].size(); ++j)
-      {
-        distancesStr << distancesOut[i][j] << ", ";
-      }
-
-      if (distancesOut[i].size() > 0)
-        distancesStr << distancesOut[i][distancesOut[i].size() - 1];
-
-      distancesStr << endl;
-    }
-
-    distancesStr.close();
-  }
-
-  fstream neighborsStr(neighborsFile.c_str(), fstream::out);
-  if (!neighborsStr.is_open())
-  {
-    Log::Warn << "Cannot open file '" << neighborsFile << "' to save output "
-        << "neighbor indices to!" << endl;
-  }
-  else
-  {
-    // Loop over each point.
-    for (size_t i = 0; i < neighborsOut.size(); ++i)
-    {
-      // Store the neighbors of each point.  We may have 0 points to store, so
-      // we must account for that possibility.
-      for (size_t j = 0; j + 1 < neighborsOut[i].size(); ++j)
-      {
-        neighborsStr << neighborsOut[i][j] << ", ";
-      }
-
-      if (neighborsOut[i].size() > 0)
-        neighborsStr << neighborsOut[i][neighborsOut[i].size() - 1];
-
-      neighborsStr << endl;
-    }
-
-    neighborsStr.close();
-  }
-  */
-}
diff --git a/src/mlpack/bindings/matlab/range_search/range_search.m b/src/mlpack/bindings/matlab/range_search/range_search.m
deleted file mode 100644
index 1665e20..0000000
--- a/src/mlpack/bindings/matlab/range_search/range_search.m
+++ /dev/null
@@ -1,47 +0,0 @@
-function result = range_search(dataPoints, maxDistance, varargin)
-%Range Search
-%
-%  This function implements range search with a Euclidean distance metric. For a
-%  given query point, a given range, and a given set of reference points, the
-%  program will return all of the reference points with distance to the query
-%  point in the given range.  This is performed for an entire set of query
-%  points. You may specify a separate set of reference and query points, or only
-%  a reference set -- which is then used as both the reference and query set.
-%  The given range is taken to be inclusive (that is, points with a distance
-%  exactly equal to the minimum and maximum of the range are included in the
-%  results).
-%
-%  For example, the following will calculate the points within the range [2, 5]
-%  of each point in 'input.csv' and store the distances in 'distances.csv' and
-%  the neighbors in 'neighbors.csv':
-%
-%Parameters:
-% dataPoints  - (required) Matrix containing the reference dataset.
-% maxDistance - (required) The upper bound of the range.
-% minDistance	- (optional) The lower bound. The default value is zero.
-% queryPoints - (optional) Range search query points.
-% leafSize    - (optional) Leaf size for tree building.  Default value 20.
-% naive			  - (optional) If true, O(n^2) naive mode is used for computation.
-% singleMode  - (optional) If true, single-tree search is used (as opposed to
-%               dual-tree search.
-
-% a parser for the inputs
-p = inputParser;
-p.addParamValue('minDistance', 0, @isscalar);
-p.addParamValue('queryPoints', zeros(0), @ismatrix);
-p.addParamValue('leafSize', 20, @isscalar);
-p.addParamValue('naive', false, @(x) (x == true) || (x == false));
-p.addParamValue('singleMode', false, @(x) (x == true) || (x == false));
-
-% parsing the varargin options
-p.parse(varargin{:});
-parsed = p.Results;
-
-% interfacing with mlpack
-result = mex_range_search(dataPoints', maxDistance, ...
-	parsed.minDistance, parsed.queryPoints', parsed.leafSize, ...
-	parsed.naive, parsed.singleMode);
-
-
-
-
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index f292e97..0f187b5 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -18,9 +18,9 @@ endmacro ()
 set(DIRS
   preprocess
   adaboost
+  ann
   approx_kfn
   amf
-  ann
   cf
   decision_stump
   det
@@ -51,7 +51,6 @@ set(DIRS
   randomized_svd
   range_search
   rann
-  rmva
   regularized_svd
   softmax_regression
   sparse_autoencoder
diff --git a/src/mlpack/methods/ann/CMakeLists.txt b/src/mlpack/methods/ann/CMakeLists.txt
index 6ff7011..44572c4 100644
--- a/src/mlpack/methods/ann/CMakeLists.txt
+++ b/src/mlpack/methods/ann/CMakeLists.txt
@@ -1,14 +1,7 @@
 # Define the files we need to compile
 # Anything not in this list will not be compiled into mlpack.
 set(SOURCES
-  cnn.hpp
-  cnn_impl.hpp
-  ffn.hpp
-  ffn_impl.hpp
-  network_util.hpp
-  network_util_impl.hpp
-  rnn.hpp
-  rnn_impl.hpp
+  init_rules/random_init.hpp
 )
 
 # Add directory name to sources.
@@ -19,10 +12,3 @@ endforeach()
 # Append sources (with directory name) to list of all mlpack sources (used at
 # the parent scope).
 set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_subdirectory(activation_functions)
-add_subdirectory(init_rules)
-add_subdirectory(layer)
-add_subdirectory(performance_functions)
-add_subdirectory(pooling_rules)
-add_subdirectory(convolution_rules)
diff --git a/src/mlpack/methods/ann/activation_functions/CMakeLists.txt b/src/mlpack/methods/ann/activation_functions/CMakeLists.txt
deleted file mode 100644
index d0b6404..0000000
--- a/src/mlpack/methods/ann/activation_functions/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  identity_function.hpp
-  logistic_function.hpp
-  softsign_function.hpp
-  tanh_function.hpp
-  rectifier_function.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/activation_functions/identity_function.hpp b/src/mlpack/methods/ann/activation_functions/identity_function.hpp
deleted file mode 100644
index 7a75b1e..0000000
--- a/src/mlpack/methods/ann/activation_functions/identity_function.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * @file identity_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the identity function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_IDENTITY_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_IDENTITY_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The identity function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& x \\
- * f'(x) &=& 1
- * @f}
- */
-class IdentityFunction
-{
- public:
-  /**
-   * Computes the identity function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  static double fn(const double x)
-  {
-    return x;
-  }
-
-  /**
-   * Computes the identity function.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void fn(const InputVecType& x, OutputVecType& y)
-  {
-    y = x;
-  }
-
-  /**
-   * Computes the first derivative of the identity function.
-   *
-   * @param x Input data.
-   * @return f'(x)
-   */
-  static double deriv(const double /* unused */)
-  {
-    return 1.0;
-  }
-
-  /**
-   * Computes the first derivatives of the identity function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void deriv(const InputVecType& y, OutputVecType& x)
-  {
-    x.ones(y.n_elem);
-  }
-
-  /**
-   * Computes the first derivatives of the identity function using a 3rd order
-   * tensor as input.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename eT>
-  static void deriv(const arma::Cube<eT>& y, arma::Cube<eT>& x)
-  {
-    x.ones(y.n_rows, y.n_cols, y.n_slices);
-  }
-
-
-}; // class IdentityFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/activation_functions/logistic_function.hpp b/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
deleted file mode 100644
index 922b14c..0000000
--- a/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * @file logistic_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the logistic function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_LOGISTIC_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_LOGISTIC_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The logistic function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \frac{1}{1 + e^{-x}} \\
- * f'(x) &=& f(x) * (1 - f(x)) \\
- * f^{-1}(y) &=& ln(\frac{y}{1-y})
- * @f}
- */
-class LogisticFunction
-{
-  public:
-  /**
-   * Computes the logistic function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  template<typename eT>
-  static double fn(const eT x)
-  {
-    if (x < arma::Datum<eT>::log_max)
-    {
-      if (x > -arma::Datum<eT>::log_max)
-        return 1.0 /  (1.0 + std::exp(-x));
-
-      return 0.0;
-    }
-
-    return 1.0;
-  }
-
-  /**
-   * Computes the logistic function.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void fn(const InputVecType& x, OutputVecType& y)
-  {
-    y = (1.0 / (1 + arma::exp(-x)));
-  }
-
-  /**
-   * Computes the first derivative of the logistic function.
-   *
-   * @param x Input data.
-   * @return f'(x)
-   */
-  static double deriv(const double y)
-  {
-    return y * (1.0 - y);
-  }
-
-  /**
-   * Computes the first derivatives of the logistic function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void deriv(const InputVecType& y, OutputVecType& x)
-  {
-    x = y % (1.0 - y);
-  }
-
-  /**
-   * Computes the inverse of the logistic function.
-   *
-   * @param y Input data.
-   * @return f^{-1}(y)
-   */
-  static double inv(const double y)
-  {
-    return arma::trunc_log(y / (1 - y));
-  }
-
-  /**
-   * Computes the inverse of the logistic function.
-   *
-   * @param y Input data.
-   * @return  x The resulting inverse of the input data.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void inv(const InputVecType& y, OutputVecType& x)
-  {
-    x = arma::trunc_log(y / (1 - y));
-  }
-}; // class LogisticFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/activation_functions/rectifier_function.hpp b/src/mlpack/methods/ann/activation_functions/rectifier_function.hpp
deleted file mode 100644
index 7d97d2c..0000000
--- a/src/mlpack/methods/ann/activation_functions/rectifier_function.hpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/**
- * @file rectifier_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the rectifier function as described by
- * V. Nair and G. E. Hinton.
- *
- * For more information, see the following paper.
- *
- * @code
- * @misc{NairHinton2010,
- *   author = {Vinod Nair, Geoffrey E. Hinton},
- *   title = {Rectified Linear Units Improve Restricted Boltzmann Machines},
- *   year = {2010}
- * }
- * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_RECTIFIER_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_RECTIFIER_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-#include <algorithm>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The rectifier function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \max(0, x) \\
- * f'(x) &=& \left\{
- *   \begin{array}{lr}
- *     1 & : x > 0 \\
- *     0 & : x \le 0
- *   \end{array}
- * \right.
- * @f}
- */
-class RectifierFunction
-{
- public:
-  /**
-   * Computes the rectifier function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  static double fn(const double x)
-  {
-    return std::max(0.0, x);
-  }
-
-  /**
-   * Computes the rectifier function using a dense matrix as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  static void fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
-  {
-    y = arma::max(arma::zeros<arma::Mat<eT> >(x.n_rows, x.n_cols), x);
-  }
-
-  /**
-   * Computes the rectifier function using a 3rd-order tensor as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  static void fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
-  {
-    y = x;
-    for (size_t s = 0; s < x.n_slices; s++)
-      fn(x.slice(s), y.slice(s));
-  }
-
-  /**
-   * Computes the first derivative of the rectifier function.
-   *
-   * @param x Input data.
-   * @return f'(x)
-   */
-  static double deriv(const double y)
-  {
-    return y > 0;
-  }
-
-  /**
-   * Computes the first derivatives of the rectifier function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputType, typename OutputType>
-  static void deriv(const InputType& y, OutputType& x)
-  {
-    x = y;
-
-    for (size_t i = 0; i < y.n_elem; i++)
-      x(i) = deriv(y(i));
-  }
-}; // class RectifierFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/activation_functions/softsign_function.hpp b/src/mlpack/methods/ann/activation_functions/softsign_function.hpp
deleted file mode 100644
index 2038bf0..0000000
--- a/src/mlpack/methods/ann/activation_functions/softsign_function.hpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * @file softsign_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the softsign function as described by
- * X. Glorot and Y. Bengio.
- *
- * For more information, see the following paper.
- *
- * @code
- * @inproceedings{GlorotAISTATS2010,
- *   title={title={Understanding the difficulty of training deep feedforward
- *   neural networks},
- *   author={Glorot, Xavier and Bengio, Yoshua},
- *   booktitle={Proceedings of AISTATS 2010},
- *   year={2010}
- * }
- * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_SOFTSIGN_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_SOFTSIGN_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The softsign function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \frac{x}{1 + |x|} \\
- * f'(x) &=& (1 - |x|)^2 \\
- * f(x) &=& \left\{
- *   \begin{array}{lr}
- *     -\frac{y}{y-1} & : x > 0 \\
- *     \frac{x}{1 + x} & : x \le 0
- *   \end{array}
- * \right.
- * @f}
- */
-class SoftsignFunction
-{
-  public:
-  /**
-   * Computes the softsign function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  static double fn(const double x)
-  {
-    if (x < DBL_MAX)
-      return x > -DBL_MAX ? x / (1.0 + std::abs(x)) : -1.0;
-    return 1.0;
-  }
-
-  /**
-   * Computes the softsign function.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void fn(const InputVecType& x, OutputVecType& y)
-  {
-    y = x;
-
-    for (size_t i = 0; i < x.n_elem; i++)
-      y(i) = fn(x(i));
-  }
-
-  /**
-   * Computes the first derivative of the softsign function.
-   *
-   * @param y Input data.
-   * @return f'(x)
-   */
-  static double deriv(const double y)
-  {
-    return std::pow(1.0 - std::abs(y), 2);
-  }
-
-  /**
-   * Computes the first derivatives of the softsign function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void deriv(const InputVecType& y, OutputVecType& x)
-  {
-    x = arma::pow(1.0 - arma::abs(y), 2);
-  }
-
-  /**
-   * Computes the inverse of the softsign function.
-   *
-   * @param y Input data.
-   * @return f^{-1}(y)
-   */
-  static double inv(const double y)
-  {
-    if (y > 0)
-      return y < 1 ? -y / (y - 1) : DBL_MAX;
-    else
-      return y > -1 ? y / (1 + y) : -DBL_MAX;
-  }
-
-  /**
-   * Computes the inverse of the softsign function.
-   *
-   * @param y Input data.
-   * @param x The resulting inverse of the input data.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void inv(const InputVecType& y, OutputVecType& x)
-  {
-    x = y;
-
-    for (size_t i = 0; i < y.n_elem; i++)
-      x(i) = inv(y(i));
-  }
-}; // class SoftsignFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/activation_functions/tanh_function.hpp b/src/mlpack/methods/ann/activation_functions/tanh_function.hpp
deleted file mode 100644
index 64b1634..0000000
--- a/src/mlpack/methods/ann/activation_functions/tanh_function.hpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * @file tanh_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the Tangens Hyperbolic function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_TANH_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_TANH_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The tanh function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \frac{e^x - e^{-x}}{e^x + e^{-x}} \\
- * f'(x) &=& 1 - \tanh^2(x) \\
- * f^{-1}(x) &=& \arctan(x)
- * @f}
- */
-class TanhFunction
-{
-  public:
-  /**
-   * Computes the tanh function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  static double fn(const double x)
-  {
-    return std::tanh(x);
-  }
-
-  /**
-   * Computes the tanh function.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void fn(const InputVecType& x, OutputVecType& y)
-  {
-    y = arma::tanh(x);
-  }
-
-  /**
-   * Computes the first derivative of the tanh function.
-   *
-   * @param y Input data.
-   * @return f'(x)
-   */
-  static double deriv(const double y)
-  {
-    return 1 - std::pow(y, 2);
-  }
-
-  /**
-   * Computes the first derivatives of the tanh function.
-   *
-   * @param y Input data.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void deriv(const InputVecType& y, OutputVecType& x)
-  {
-    x = 1 - arma::pow(y, 2);
-  }
-
-  /**
-   * Computes the inverse of the tanh function.
-   *
-   * @param y Input data.
-   * @return f^{-1}(x)
-   */
-  static double inv(const double y)
-  {
-    return std::atanh(y);
-  }
-
-  /**
-   * Computes the inverse of the tanh function.
-   *
-   * @param y Input data.
-   * @param x The resulting inverse of the input data.
-   */
-  template<typename InputVecType, typename OutputVecType>
-  static void inv(const InputVecType& y, OutputVecType& x)
-  {
-    x = arma::atanh(y);
-  }
-}; // class TanhFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
deleted file mode 100644
index 72e0803..0000000
--- a/src/mlpack/methods/ann/cnn.hpp
+++ /dev/null
@@ -1,448 +0,0 @@
-/**
- * @file cnn.hpp
- * @author Shangtong Zhang
- * @author Marcus Edel
- *
- * Definition of the CNN class, which implements convolutional neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CNN_HPP
-#define MLPACK_METHODS_ANN_CNN_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a standard convolutional network.
- *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
- * @tparam OutputLayerType The outputlayer type used to evaluate the network.
- * @tparam PerformanceFunction Performance strategy used to calculate the error.
- */
-template <
-  typename LayerTypes,
-  typename OutputLayerType,
-  typename InitializationRuleType = NguyenWidrowInitialization,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
->
-class CNN
-{
- public:
-  //! Convenience typedef for the internal model construction.
-  using NetworkType = CNN<LayerTypes,
-                          OutputLayerType,
-                          InitializationRuleType,
-                          PerformanceFunction>;
-
-  /**
-   * Create the CNN object with the given predictors and responses set (this is
-   * the set that is used to train the network) and the given optimizer.
-   * Optionally, specify which initialize rule and performance function should
-   * be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Outputlayer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network paramter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to claculate the error.
-   */
-  template<typename LayerType,
-           typename OutputType,
-           template<typename> class OptimizerType>
-  CNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::cube& predictors,
-      const arma::mat& responses,
-      OptimizerType<NetworkType>& optimizer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the CNN object with the given predictors and responses set (this is
-   * the set that is used to train the network). Optionally, specify which
-   * initialize rule and performance function should be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Outputlayer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network paramter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to claculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  CNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::cube& predictors,
-      const arma::mat& responses,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the CNN object with an empty predictors and responses set and
-   * default optimizer. Make sure to call Train(predictors, responses) when
-   * training.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Outputlayer used to evaluate the network.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network paramter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to claculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  CNN(LayerType &&network,
-      OutputType &&outputLayer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-  /**
-   * Train the convolutional neural network on the given input data. By default, the
-   * RMSprop optimization algorithm is used, but others can be specified
-   * (such as mlpack::optimization::SGD).
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::cube& predictors, const arma::mat& responses);
-
-  /**
-   * Train the convolutional neural network with the given instantiated optimizer.
-   * Using this overload allows configuring the instantiated optimizer before
-   * training is performed.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Train the convolutional neural network on the given input data using the
-   * given optimizer.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::cube& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Predict the responses to a given set of predictors. The responses will
-   * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
-   *
-   * @param predictors Input predictors.
-   * @param responses Matrix to put output predictions of responses into.
-   */
-  void Predict(arma::cube& predictors, arma::mat& responses);
-
-  /**
-   * Evaluate the convolutional neural network with the given parameters. This
-   * function is usually called by the optimizer to train the model.
-   *
-   * @param parameters Matrix model parameters.
-   * @param i Index of point to use for objective function evaluation.
-   * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
-   */
-  double Evaluate(const arma::mat& parameters,
-                  const size_t i,
-                  const bool deterministic = true);
-
-  /**
-   * Evaluate the gradient of the convolutional neural network with the given
-   * parameters, and with respect to only one point in the dataset. This is
-   * useful for optimizers such as SGD, which require a separable objective
-   * function.
-   *
-   * @param parameters Matrix of the model parameters to be optimized.
-   * @param i Index of points to use for objective function gradient evaluation.
-   * @param gradient Matrix to output gradient into.
-   */
-  void Gradient(const arma::mat& parameters,
-                const size_t i,
-                arma::mat& gradient);
-
-  //! Return the number of separable functions (the number of predictor points).
-  size_t NumFunctions() const { return numFunctions; }
-
-  //! Return the initial point for the optimization.
-  const arma::mat& Parameters() const { return parameter; }
-  //! Modify the initial point for the optimization.
-  arma::mat& Parameters() { return parameter; }
-
-  /**
-   * Serialize the convolutional neural network.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
-
- private:
-  /**
-   * Reset the network by setting the layer status.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Reset the layer status by setting the current deterministic parameter
-   * through all layer that implement the Deterministic function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* unused */) { /* Nothing to do here */
-  }
-
-  /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).InputParameter() = input;
-
-    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
-                           std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    LinkParameter(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Link the calculated activation with the connection layer.
-   */
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Tp...>(network);
-  }
-
-  /*
-   * Calculate the output error and update the overall error.
-   */
-  template<typename DataType, typename ErrorType, typename... Tp>
-  double OutputError(const DataType& target,
-                     ErrorType& error,
-                     const std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output error.
-    outputLayer.CalculateError(
-        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
-
-    // Masures the network's performance with the specified performance
-    // function.
-    return performanceFunc.Error(network, target, error);
-  }
-
-  /**
-   * Run a single iteration of the feed backward algorithm, using the given
-   * error of the output layer. Note that we iterate backward through the
-   * layer modules.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
-  Backward(const DataType& error, std::tuple<Tp...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* unused */,
-               std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-           std::get<I + 1>(network).Delta());
-
-    UpdateGradients<I + 1, Max, Tp...>(network);
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& layer, P& /* unused */, D& delta)
-  {
-    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /*
-   * Calculate and store the output activation.
-   */
-  template<typename DataType, typename... Tp>
-  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output prediction.
-    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
-        network).OutputParameter(), output);
-  }
-
-  //! Instantiated convolutional neural network.
-  LayerTypes network;
-
-  //! The outputlayer used to evaluate the network
-  OutputLayerType& outputLayer;
-
-  //! Performance strategy used to claculate the error.
-  PerformanceFunction performanceFunc;
-
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
-
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
-
-  //! The matrix of data points (predictors).
-  arma::cube predictors;
-
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
-
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
-
-  //! Locally stored backward error.
-  arma::mat error;
-
-  //! Locally stored sample size.
-  size_t sampleSize;
-}; // class CNN
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "cnn_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/ann/cnn_impl.hpp b/src/mlpack/methods/ann/cnn_impl.hpp
deleted file mode 100644
index ba774ba..0000000
--- a/src/mlpack/methods/ann/cnn_impl.hpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * @file cnn_impl.hpp
- * @author Marcus Edel
- *
- * Definition of the CNN class, which implements convolutional neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CNN_IMPL_HPP
-#define MLPACK_METHODS_ANN_CNN_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "cnn.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType,
-         typename OutputType,
-         template<typename> class OptimizerType
->
-CNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::CNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::cube& predictors,
-       const arma::mat& responses,
-       OptimizerType<NetworkType>& optimizer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    predictors(predictors),
-    responses(responses),
-    numFunctions(predictors.n_cols)
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-CNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::CNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::cube& predictors,
-       const arma::mat& responses,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  Train(predictors, responses);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-CNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::CNN(LayerType &&network,
-       OutputType &&outputLayer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::cube& predictors, const arma::mat& responses)
-{
-  numFunctions = predictors.n_cols;
-  sampleSize = predictors.n_slices / responses.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  OptimizerType<decltype(*this)> optimizer(*this);
-
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::cube& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
-{
-  numFunctions = responses.n_cols;
-  sampleSize = predictors.n_slices / responses.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<
-    template<typename> class OptimizerType
->
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(OptimizerType<NetworkType>& optimizer)
-{
-  // Train the model.
-  Timer::Start("cnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("cnn_optimization");
-
-  Log::Info << "CNN::CNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Predict(arma::cube& predictors, arma::mat& responses)
-{
-  deterministic = true;
-
-  arma::mat responsesTemp;
-  ResetParameter(network);
-  Forward(predictors.slices(0, sampleSize - 1), network);
-  OutputPrediction(responsesTemp, network);
-
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_slices);
-  responses.col(0) = responsesTemp.col(0);
-
-  for (size_t i = 1; i < (predictors.n_slices / sampleSize); i++)
-  {
-    Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network);
-
-    responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-        true);
-    OutputPrediction(responsesTemp, network);
-    responses.col(i) = responsesTemp.col(0);
-  }
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-double CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
-{
-  this->deterministic = deterministic;
-
-  ResetParameter(network);
-  Forward(predictors.slices(i, (i + 1) * sampleSize - 1), network);
-
-  return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-      true), error, network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
-{
-  Evaluate(parameter, i, false);
-
-  NetworkGradients(gradient, network);
-
-  Backward<>(error, network);
-  UpdateGradients<>(network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename Archive>
-void CNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Serialize(Archive& ar, const unsigned int /* version */)
-{
-  ar & data::CreateNVP(parameter, "parameter");
-  ar & data::CreateNVP(sampleSize, "sampleSize");
-
-  // If we are loading, we need to initialize the weights.
-  if (Archive::is_loading::value)
-  {
-    NetworkWeights(parameter, network);
-  }
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/CMakeLists.txt b/src/mlpack/methods/ann/convolution_rules/CMakeLists.txt
deleted file mode 100644
index 3e69071..0000000
--- a/src/mlpack/methods/ann/convolution_rules/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  border_modes.hpp
-  naive_convolution.hpp
-  fft_convolution.hpp
-  svd_convolution.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/convolution_rules/border_modes.hpp b/src/mlpack/methods/ann/convolution_rules/border_modes.hpp
deleted file mode 100644
index b9e6b1e..0000000
--- a/src/mlpack/methods/ann/convolution_rules/border_modes.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/**
- * @file border_modes.hpp
- * @author Marcus Edel
- *
- * This file provides the border modes that can be used to compute different
- * convolutions.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_BORDER_MODES_HPP
-#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_BORDER_MODES_HPP
-
-namespace mlpack {
-namespace ann {
-
-/*
- * The FullConvolution class represents the full two-dimensional convolution.
- */
-class FullConvolution { /* Nothing to do here */ };
-
-/*
- * The ValidConvolution represents only those parts of the convolution that are
- * computed without the zero-padded edges.
- */
-class ValidConvolution { /* Nothing to do here */ };
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp
deleted file mode 100644
index bbcfecd..0000000
--- a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/**
- * @file fft_convolution.hpp
- * @author Shangtong Zhang
- * @author Marcus Edel
- *
- * Implementation of the convolution through fft.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_FFT_CONVOLUTION_HPP
-#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_FFT_CONVOLUTION_HPP
-
-#include <mlpack/core.hpp>
-#include "border_modes.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Computes the two-dimensional convolution through fft. This class allows
- * specification of the type of the border type. The convolution can be compute
- * with the valid border type of the full border type (default).
- *
- * FullConvolution: returns the full two-dimensional convolution.
- * ValidConvolution: returns only those parts of the convolution that are
- * computed without the zero-padded edges.
- *
- * @tparam BorderMode Type of the border mode (FullConvolution or
- * ValidConvolution).
- * @tparam padLastDim Pad the last dimension of the input to to turn it from
- * odd to even.
- */
-template<typename BorderMode = FullConvolution, const bool padLastDim = false>
-class FFTConvolution
-{
- public:
-  /*
-   * Perform a convolution through fft (valid mode). This method only supports
-   * input which is even on the last dimension. In case of an odd input width, a
-   * user can manually pad the imput or specify the padLastDim parameter which
-   * takes care of the padding. The filter instead can have any size. When using
-   * the valid mode the filters has to be smaller than the input.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT, typename Border = BorderMode>
-  static typename std::enable_if<
-      std::is_same<Border, ValidConvolution>::value, void>::type
-  Convolution(const arma::Mat<eT>& input,
-              const arma::Mat<eT>& filter,
-              arma::Mat<eT>& output)
-  {
-    arma::Mat<eT> inputPadded = input;
-    arma::Mat<eT> filterPadded = filter;
-
-    if (padLastDim)
-      inputPadded.resize(inputPadded.n_rows, inputPadded.n_cols + 1);
-
-    // Pad filter and input to the output shape.
-    filterPadded.resize(inputPadded.n_rows, inputPadded.n_cols);
-
-    output = arma::real(ifft2(arma::fft2(inputPadded) % arma::fft2(
-        filterPadded)));
-
-    // Extract the region of interest. We don't need to handle the padLastDim in
-    // a special way we just cut it out from the output matrix.
-    output = output.submat(filter.n_rows - 1, filter.n_cols - 1,
-        input.n_rows - 1, input.n_cols - 1);
-  }
-
-  /*
-   * Perform a convolution through fft (full mode). This method only supports
-   * input which is even on the last dimension. In case of an odd input width, a
-   * user can manually pad the imput or specify the padLastDim parameter which
-   * takes care of the padding. The filter instead can have any size.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT, typename Border = BorderMode>
-  static typename std::enable_if<
-      std::is_same<Border, FullConvolution>::value, void>::type
-  Convolution(const arma::Mat<eT>& input,
-              const arma::Mat<eT>& filter,
-              arma::Mat<eT>& output)
-  {
-    // In case of the full convolution outputRows and outputCols doesn't
-    // represent the true output size when the padLastDim parameter is set,
-    // instead it's the working size.
-    const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1);
-    size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1);
-
-    if (padLastDim)
-        outputCols++;
-
-    // Pad filter and input to the working output shape.
-    arma::Mat<eT> inputPadded = arma::zeros<arma::Mat<eT> >(outputRows,
-        outputCols);
-    inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1,
-          filter.n_rows - 1 + input.n_rows - 1,
-          filter.n_cols - 1 + input.n_cols - 1) = input;
-
-    arma::Mat<eT> filterPadded = filter;
-    filterPadded.resize(outputRows, outputCols);
-
-    // Perform FFT and IFFT
-    output = arma::real(ifft2(arma::fft2(inputPadded) % arma::fft2(
-        filterPadded)));
-
-    // Extract the region of interest. We don't need to handle the padLastDim
-    // parameter in a special way we just cut it out from the output matrix.
-    output = output.submat(filter.n_rows - 1, filter.n_cols - 1,
-        2 * (filter.n_rows - 1) + input.n_rows - 1,
-        2 * (filter.n_cols - 1) + input.n_cols - 1);
-  }
-
-  /*
-   * Perform a convolution through fft using 3rd order tensors. This method only
-   * supports input which is even on the last dimension. In case of an odd input
-   * width, a user can manually pad the imput or specify the padLastDim
-   * parameter which takes care of the padding. The filter instead can have any
-   * size.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Cube<eT>& input,
-                          const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    FFTConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        input.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < input.n_slices; i++)
-    {
-      FFTConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
-          convOutput);
-      output.slice(i) = convOutput;
-    }
-  }
-
-  /*
-   * Perform a convolution through fft using dense matrix as input and a 3rd
-   * order tensors as filter and output. This method only supports input which
-   * is even on the last dimension. In case of an odd input width, a user can
-   * manually pad the imput or specify the padLastDim parameter which takes care
-   * of the padding. The filter instead can have any size.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Mat<eT>& input,
-                          const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    FFTConvolution<BorderMode>::Convolution(input, filter.slice(0),
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        filter.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < filter.n_slices; i++)
-    {
-      FFTConvolution<BorderMode>::Convolution(input, filter.slice(i),
-          convOutput);
-      output.slice(i) = convOutput;
-    }
-  }
-
-  /*
-   * Perform a convolution using a 3rd order tensors as input and output and a
-   * dense matrix as filter.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Cube<eT>& input,
-                          const arma::Mat<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    FFTConvolution<BorderMode>::Convolution(input.slice(0), filter,
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        input.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < input.n_slices; i++)
-    {
-      FFTConvolution<BorderMode>::Convolution(input.slice(i), filter,
-          convOutput);
-      output.slice(i) = convOutput;
-    }
-  }
-
-};  // class FFTConvolution
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
deleted file mode 100644
index fc7fc69..0000000
--- a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
+++ /dev/null
@@ -1,190 +0,0 @@
-/**
- * @file naive_convolution.hpp
- * @author Shangtong Zhang
- * @author Marcus Edel
- *
- * Implementation of the convolution.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_NAIVE_CONVOLUTION_HPP
-#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_NAIVE_CONVOLUTION_HPP
-
-#include <mlpack/core.hpp>
-#include "border_modes.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Computes the two-dimensional convolution. This class allows specification of
- * the type of the border type. The convolution can be compute with the valid
- * border type of the full border type (default).
- *
- * FullConvolution: returns the full two-dimensional convolution.
- * ValidConvolution: returns only those parts of the convolution that are
- * computed without the zero-padded edges.
- *
- * @tparam BorderMode Type of the border mode (FullConvolution or
- * ValidConvolution).
- */
-template<typename BorderMode = FullConvolution>
-class NaiveConvolution
-{
- public:
-  /*
-   * Perform a convolution (valid mode).
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT, typename Border = BorderMode>
-  static typename std::enable_if<
-      std::is_same<Border, ValidConvolution>::value, void>::type
-  Convolution(const arma::Mat<eT>& input,
-              const arma::Mat<eT>& filter,
-              arma::Mat<eT>& output)
-  {
-    output = arma::zeros<arma::Mat<eT> >(input.n_rows - filter.n_rows + 1,
-        input.n_cols - filter.n_cols + 1);
-
-    // It seems to be about 3.5 times faster to use pointers instead of
-    // filter(ki, kj) * input(leftInput + ki, topInput + kj) and output(i, j).
-    eT* outputPtr = output.memptr();
-
-    for (size_t j = 0; j < output.n_cols; ++j)
-    {
-      for (size_t i = 0; i < output.n_rows; ++i, outputPtr++)
-      {
-        const eT* kernelPtr = filter.memptr();
-        for (size_t kj = 0; kj < filter.n_cols; ++kj)
-        {
-          const eT* inputPtr = input.colptr(kj + j) + i;
-          for (size_t ki = 0; ki < filter.n_rows; ++ki, ++kernelPtr, ++inputPtr)
-            *outputPtr += *kernelPtr * (*inputPtr);
-        }
-      }
-    }
-  }
-
-  /*
-   * Perform a convolution (full mode).
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT, typename Border = BorderMode>
-  static typename std::enable_if<
-      std::is_same<Border, FullConvolution>::value, void>::type
-  Convolution(const arma::Mat<eT>& input,
-              const arma::Mat<eT>& filter,
-              arma::Mat<eT>& output)
-  {
-    const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1);
-    const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1);
-
-    // Pad filter and input to the working output shape.
-    arma::Mat<eT> inputPadded = arma::zeros<arma::Mat<eT> >(outputRows,
-        outputCols);
-    inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1,
-          filter.n_rows - 1 + input.n_rows - 1,
-          filter.n_cols - 1 + input.n_cols - 1) = input;
-
-    NaiveConvolution<ValidConvolution>::Convolution(inputPadded, filter,
-        output);
-  }
-
-  /*
-   * Perform a convolution using 3rd order tensors.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Cube<eT>& input,
-                          const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    NaiveConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        input.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < input.n_slices; i++)
-    {
-      NaiveConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
-          output.slice(i));
-    }
-  }
-
-  /*
-   * Perform a convolution using dense matrix as input and a 3rd order tensors
-   * as filter and output.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Mat<eT>& input,
-                          const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    NaiveConvolution<BorderMode>::Convolution(input, filter.slice(0),
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        filter.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < filter.n_slices; i++)
-    {
-      NaiveConvolution<BorderMode>::Convolution(input, filter.slice(i),
-          output.slice(i));
-    }
-  }
-
-  /*
-   * Perform a convolution using a 3rd order tensors as input and output and a
-   * dense matrix as filter.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Cube<eT>& input,
-                          const arma::Mat<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    NaiveConvolution<BorderMode>::Convolution(input.slice(0), filter,
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        input.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < input.n_slices; i++)
-    {
-      NaiveConvolution<BorderMode>::Convolution(input.slice(i), filter,
-          output.slice(i));
-    }
-  }
-
-};  // class NaiveConvolution
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
deleted file mode 100644
index a0b317e..0000000
--- a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
- * @file svd_convolution.hpp
- * @author Marcus Edel
- *
- * Implementation of the convolution using the singular value decomposition to
- * speeded up the computation.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_SVD_CONVOLUTION_HPP
-#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_SVD_CONVOLUTION_HPP
-
-#include <mlpack/core.hpp>
-#include "border_modes.hpp"
-#include "fft_convolution.hpp"
-#include "naive_convolution.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Computes the two-dimensional convolution using singular value decomposition.
- * This class allows specification of the type of the border type. The
- * convolution can be compute with the valid border type of the full border
- * type (default).
- *
- * FullConvolution: returns the full two-dimensional convolution.
- * ValidConvolution: returns only those parts of the convolution that are
- * computed without the zero-padded edges.
- *
- * @tparam BorderMode Type of the border mode (FullConvolution or
- * ValidConvolution).
- */
-template<typename BorderMode = FullConvolution>
-class SVDConvolution
-{
- public:
-  /*
-   * Perform a convolution (valid or full mode) using singular value
-   * decomposition. By using singular value decomposition of the filter matrix
-   * the convolution can be expressed as a sum of outer products. Each product
-   * can be computed efficiently as convolution with a row and a column vector.
-   * The individual convolutions are computed with the naive implementation
-   * which is fast if the filter is low-dimensional.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Mat<eT>& input,
-                          const arma::Mat<eT>& filter,
-                          arma::Mat<eT>& output)
-  {
-    // Use the naive convolution in case the filter isn't two dimensional or the
-    // filter is bigger than the input.
-    if (filter.n_rows > input.n_rows || filter.n_cols > input.n_cols ||
-        filter.n_rows == 1 || filter.n_cols == 1)
-    {
-      NaiveConvolution<BorderMode>::Convolution(input, filter, output);
-    }
-    else
-    {
-      arma::Mat<eT> U, V, subOutput;
-      arma::Col<eT> s;
-
-      arma::svd_econ(U, s, V, filter);
-
-      // Rank approximation using the singular values calculated with singular
-      // value decomposition of dense filter matrix.
-      const size_t rank = arma::sum(s > (s.n_elem * arma::max(s) *
-          arma::datum::eps));
-
-      // Test for separability based on the rank of the kernel and take
-      // advantage of the low rank.
-      if (rank * (filter.n_rows + filter.n_cols) < filter.n_elem)
-      {
-        arma::Mat<eT> subFilter = V.unsafe_col(0) * s(0);
-        NaiveConvolution<BorderMode>::Convolution(input, subFilter, subOutput);
-
-        subOutput = subOutput.t();
-        NaiveConvolution<BorderMode>::Convolution(subOutput, U.unsafe_col(0),
-            output);
-
-        for (size_t r = 1; r < rank; r++)
-        {
-          subFilter = V.unsafe_col(r) * s(r);
-          NaiveConvolution<BorderMode>::Convolution(input, subFilter,
-              subOutput);
-
-          arma::Mat<eT> temp;
-          subOutput = subOutput.t();
-          NaiveConvolution<BorderMode>::Convolution(subOutput, U.unsafe_col(r),
-              temp);
-          output += temp;
-        }
-
-        output = output.t();
-      }
-      else
-      {
-        FFTConvolution<BorderMode>::Convolution(input, filter, output);
-      }
-    }
-  }
-
-  /*
-   * Perform a convolution using 3rd order tensors.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Cube<eT>& input,
-                          const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    SVDConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
-        convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        input.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < input.n_slices; i++)
-    {
-      SVDConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
-          convOutput);
-      output.slice(i) = convOutput;
-    }
-  }
-
-  /*
-   * Perform a convolution using dense matrix as input and a 3rd order tensors
-   * as filter and output.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Mat<eT>& input,
-                          const arma::Cube<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    SVDConvolution<BorderMode>::Convolution(input, filter.slice(0), convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        filter.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < filter.n_slices; i++)
-    {
-      SVDConvolution<BorderMode>::Convolution(input, filter.slice(i),
-          convOutput);
-      output.slice(i) = convOutput;
-    }
-  }
-
-  /*
-   * Perform a convolution using a 3rd order tensors as input and output and a
-   * dense matrix as filter.
-   *
-   * @param input Input used to perform the convolution.
-   * @param filter Filter used to perform the conolution.
-   * @param output Output data that contains the results of the convolution.
-   */
-  template<typename eT>
-  static void Convolution(const arma::Cube<eT>& input,
-                          const arma::Mat<eT>& filter,
-                          arma::Cube<eT>& output)
-  {
-    arma::Mat<eT> convOutput;
-    SVDConvolution<BorderMode>::Convolution(input.slice(0), filter, convOutput);
-
-    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
-        input.n_slices);
-    output.slice(0) = convOutput;
-
-    for (size_t i = 1; i < input.n_slices; i++)
-    {
-      SVDConvolution<BorderMode>::Convolution(input.slice(i), filter,
-          convOutput);
-      output.slice(i) = convOutput;
-    }
-  }
-
-};  // class SVDConvolution
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
deleted file mode 100644
index f9bc4d5..0000000
--- a/src/mlpack/methods/ann/ffn.hpp
+++ /dev/null
@@ -1,447 +0,0 @@
-/**
- * @file ffn.hpp
- * @author Marcus Edel
- *
- * Definition of the FFN class, which implements feed forward neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_FFN_HPP
-#define MLPACK_METHODS_ANN_FFN_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of a standard feed forward network.
- *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
- * @tparam OutputLayerType The output layer type used to evaluate the network.
- * @tparam InitializationRuleType Rule used to initialize the weight matrix.
- * @tparam PerformanceFunction Performance strategy used to calculate the error.
- */
-template <
-  typename LayerTypes,
-  typename OutputLayerType,
-  typename InitializationRuleType = NguyenWidrowInitialization,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
->
-class FFN
-{
- public:
-  //! Convenience typedef for the internal model construction.
-  using NetworkType = FFN<LayerTypes,
-                          OutputLayerType,
-                          InitializationRuleType,
-                          PerformanceFunction>;
-
-  /**
-   * Create the FFN object with the given predictors and responses set (this is
-   * the set that is used to train the network) and the given optimizer.
-   * Optionally, specify which initialize rule and performance function should
-   * be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType,
-           typename OutputType,
-           template<typename> class OptimizerType>
-  FFN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
-      const arma::mat& responses,
-      OptimizerType<NetworkType>& optimizer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the FFN object with the given predictors and responses set (this is
-   * the set that is used to train the network). Optionally, specify which
-   * initialize rule and performance function should be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  FFN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
-      const arma::mat& responses,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the FNN object with an empty predictors and responses set and
-   * default optimizer. Make sure to call Train(predictors, responses) when
-   * training.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  FFN(LayerType &&network,
-      OutputType &&outputLayer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Train the feedforward network on the given input data. By default, the
-   * RMSprop optimization algorithm is used, but others can be specified
-   * (such as mlpack::optimization::SGD).
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::mat& predictors, const arma::mat& responses);
-
-  /**
-   * Train the feedforward network with the given instantiated optimizer.
-   * Using this overload allows configuring the instantiated optimizer before
-   * training is performed.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Train the feedforward network on the given input data using the given
-   * optimizer.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::mat& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Predict the responses to a given set of predictors. The responses will
-   * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
-   *
-   * @param predictors Input predictors.
-   * @param responses Matrix to put output predictions of responses into.
-   */
-  void Predict(arma::mat& predictors, arma::mat& responses);
-
-  /**
-   * Evaluate the feedforward network with the given parameters. This function
-   * is usually called by the optimizer to train the model.
-   *
-   * @param parameters Matrix model parameters.
-   * @param i Index of point to use for objective function evaluation.
-   * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
-   */
-  double Evaluate(const arma::mat& parameters,
-                  const size_t i,
-                  const bool deterministic = true);
-
-  /**
-   * Evaluate the gradient of the feedforward network with the given parameters,
-   * and with respect to only one point in the dataset. This is useful for
-   * optimizers such as SGD, which require a separable objective function.
-   *
-   * @param parameters Matrix of the model parameters to be optimized.
-   * @param i Index of points to use for objective function gradient evaluation.
-   * @param gradient Matrix to output gradient into.
-   */
-  void Gradient(const arma::mat& parameters,
-                const size_t i,
-                arma::mat& gradient);
-
-  //! Return the number of separable functions (the number of predictor points).
-  size_t NumFunctions() const { return numFunctions; }
-
-  //! Return the initial point for the optimization.
-  const arma::mat& Parameters() const { return parameter; }
-  //! Modify the initial point for the optimization.
-  arma::mat& Parameters() { return parameter; }
-
-  //! Serialize the model.
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
-
-private:
-  /**
-   * Reset the network by zeroing the layer activations and by setting the
-   * layer status.
-   *
-   * enable_if (SFINAE) is used to iterate through the network. The general
-   * case peels off the first type and recurses, as usual with
-   * variadic function templates.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Reset the layer status by setting the current deterministic parameter
-   * through all layer that implement the Deterministic function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* unused */) { /* Nothing to do here */ }
-
-  /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).InputParameter() = input;
-
-    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    LinkParameter(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
-                           std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Link the calculated activation with the connection layer.
-   */
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp ...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Tp...>(network);
-  }
-
-  /*
-   * Calculate the output error and update the overall error.
-   */
-  template<typename DataType, typename ErrorType, typename... Tp>
-  double OutputError(const DataType& target,
-                     ErrorType& error,
-                     const std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output error.
-    outputLayer.CalculateError(
-        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
-
-    // Measures the network's performance with the specified performance
-    // function.
-    return performanceFunc.Error(network, target, error);
-  }
-
-  /**
-   * Run a single iteration of the feed backward algorithm, using the given
-   * error of the output layer. Note that we iterate backward through the
-   * layer modules.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
-  Backward(const DataType& error, std::tuple<Tp ...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* unused */,
-               std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-           std::get<I + 1>(network).Delta());
-
-    UpdateGradients<I + 1, Max, Tp...>(network);
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& layer, P& /* unused */, D& delta)
-  {
-    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Update(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /*
-   * Calculate and store the output activation.
-   */
-  template<typename DataType, typename... Tp>
-  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output prediction.
-    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
-        network).OutputParameter(), output);
-  }
-
-  //! Instantiated feedforward network.
-  LayerTypes network;
-
-  //! The output layer used to evaluate the network
-  OutputLayerType outputLayer;
-
-  //! Performance strategy used to calculate the error.
-  PerformanceFunction performanceFunc;
-
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
-
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
-
-  //! The matrix of data points (predictors).
-  arma::mat predictors;
-
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
-
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
-
-  //! Locally stored backward error.
-  arma::mat error;
-}; // class FFN
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "ffn_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/ann/ffn_impl.hpp b/src/mlpack/methods/ann/ffn_impl.hpp
deleted file mode 100644
index 5b1cc61..0000000
--- a/src/mlpack/methods/ann/ffn_impl.hpp
+++ /dev/null
@@ -1,296 +0,0 @@
-/**
- * @file ffn_impl.hpp
- * @author Marcus Edel
- *
- * Definition of the FFN class, which implements feed forward neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_FFN_IMPL_HPP
-#define MLPACK_METHODS_ANN_FFN_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "ffn.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType,
-         typename OutputType,
-         template<typename> class OptimizerType
->
-FFN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::FFN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       OptimizerType<NetworkType>& optimizer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    predictors(predictors),
-    responses(responses),
-    numFunctions(predictors.n_cols)
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-FFN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::FFN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  Train(predictors, responses);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-FFN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::FFN(LayerType &&network,
-       OutputType &&outputLayer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction))
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors, const arma::mat& responses)
-{
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  OptimizerType<decltype(*this)> optimizer(*this);
-
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
-{
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<
-    template<typename> class OptimizerType
->
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(OptimizerType<NetworkType>& optimizer)
-{
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Predict(arma::mat& predictors, arma::mat& responses)
-{
-  deterministic = true;
-
-  arma::mat responsesTemp;
-  ResetParameter(network);
-  Forward(arma::mat(predictors.colptr(0), predictors.n_rows, 1, false, true),
-      network);
-  OutputPrediction(responsesTemp, network);
-
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
-  responses.col(0) = responsesTemp.col(0);
-
-  for (size_t i = 1; i < predictors.n_cols; i++)
-  {
-    Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true),
-        network);
-
-    responsesTemp = arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-        true);
-    OutputPrediction(responsesTemp, network);
-    responses.col(i) = responsesTemp.col(0);
-  }
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-double FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
-{
-  this->deterministic = deterministic;
-
-  ResetParameter(network);
-
-  Forward(arma::mat(predictors.colptr(i), predictors.n_rows, 1, false, true),
-      network);
-
-  return OutputError(arma::mat(responses.colptr(i), responses.n_rows, 1, false,
-      true), error, network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
-{
-  if (gradient.is_empty())
-  {
-    gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
-  }
-
-
-  Evaluate(parameter, i, false);
-
-  NetworkGradients(gradient, network);
-
-  Backward<>(error, network);
-  UpdateGradients<>(network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename Archive>
-void FFN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Serialize(Archive& ar, const unsigned int /* version */)
-{
-  ar & data::CreateNVP(parameter, "parameter");
-
-  // If we are loading, we need to initialize the weights.
-  if (Archive::is_loading::value)
-  {
-    NetworkWeights(parameter, network);
-  }
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/init_rules/CMakeLists.txt b/src/mlpack/methods/ann/init_rules/CMakeLists.txt
deleted file mode 100644
index 981ceaa..0000000
--- a/src/mlpack/methods/ann/init_rules/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  random_init.hpp
-  oivs_init.hpp
-  kathirvalavakumar_subavathi_init.hpp
-  nguyen_widrow_init.hpp
-  zero_init.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp b/src/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp
deleted file mode 100644
index 491ab5a..0000000
--- a/src/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * @file kathirvalavakumar_subavathi_init.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the initialization method by T.
- * Kathirvalavakumar and S. Subavathi. This initialization rule is based on
- * sensitivity analysis using cauchy’s inequality.
- *
- * For more information, see the following paper.
- *
- * @code
- * @inproceedings{KathirvalavakumarJILSA2011,
- *   title={A New Weight Initialization Method Using Cauchy’s Inequality Based
- *   on Sensitivity Analysis},
- *   author={T. Kathirvalavakumar and S. Subavathi},
- *   booktitle={Journal of Intelligent Learning Systems and Applications,
- *   Vol. 3 No. 4},
- *   year={2011}
- * }
- * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_INIT_RULES_KATHIRVALAVAKUMAR_SUBAVATHI_INIT_HPP
-#define MLPACK_METHODS_ANN_INIT_RULES_KATHIRVALAVAKUMAR_SUBAVATHI_INIT_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <iostream>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to initialize the weight matrix with the method proposed
- * by T. Kathirvalavakumar and S. Subavathi. The method is based on sensitivity
- * analysis using using cauchy’s inequality. The method is defined by
- *
- * @f{eqnarray*}{
- * \overline{s} &=& f^{-1}(\overline{t}) \\
- * \Theta^{1}_{p} &\le& \overline{s}
- *     \sqrt{\frac{3}{I \sum_{i = 1}^{I} (x_{ip}^2)}} \\
- * \Theta^1 &=& min(\Theta_{p}^{1}); p=1,2,..,P \\
- * -\Theta^{1} \le w_{i}^{1} &\le& \Theta^{1}
- * @f}
- *
- * where I is the number of inputs including the bias, p refers the pattern
- * considered in training, f is the transfer function and \={s} is the active
- * region in which the derivative of the activation function is greater than 4%
- * of the maximum derivatives.
- */
-class KathirvalavakumarSubavathiInitialization
-{
- public:
-  /**
-   * Initialize the random initialization rule with the given values.
-   *
-   * @param data The input patterns.
-   * @param s Parameter that defines the active region.
-   */
-  template<typename eT>
-  KathirvalavakumarSubavathiInitialization(const arma::Mat<eT>& data,
-                                           const double s) : s(s)
-  {
-    dataSum = arma::sum(data % data);
-  }
-
-  /**
-   * Initialize the elements of the specified weight matrix with the
-   * Kathirvalavakumar-Subavathi method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
-  {
-    arma::Row<eT> b = s * arma::sqrt(3 / (rows * dataSum));
-    const double theta = b.min();
-    RandomInitialization randomInit(-theta, theta);
-    randomInit.Initialize(W, rows, cols);
-  }
-
-  /**
-   * Initialize the elements of the specified weight 3rd order tensor with the
-   * Kathirvalavakumar-Subavathi method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Cube<eT>& W,
-                  const size_t rows,
-                  const size_t cols,
-                  const size_t slices)
-  {
-    W = arma::Cube<eT>(rows, cols, slices);
-
-    for (size_t i = 0; i < slices; i++)
-      Initialize(W.slice(i), rows, cols);
-  }
-
- private:
-  //! Parameter that defines the sum of elements in each column.
-  arma::rowvec dataSum;
-
-  //! Parameter that defines the active region.
-  const double s;
-}; // class KathirvalavakumarSubavathiInitialization
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp b/src/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp
deleted file mode 100644
index c6082b2..0000000
--- a/src/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * @file nguyen_widrow_init.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the Nguyen-Widrow method. This
- * initialization rule initialize the weights so that the active regions of the
- * neurons are approximately evenly distributed over the input space.
- *
- * For more information, see the following paper.
- *
- * @code
- * @inproceedings{NguyenIJCNN1990,
- *   title={Improving the learning speed of 2-layer neural networks by choosing
- *   initial values of the adaptive weights},
- *   booktitle={Neural Networks, 1990., 1990 IJCNN International Joint
- *   Conference on},
- *   year={1990}
- * }
- * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_INIT_RULES_NGUYEN_WIDROW_INIT_HPP
-#define MLPACK_METHODS_ANN_INIT_RULES_NGUYEN_WIDROW_INIT_HPP
-
-#include <mlpack/core.hpp>
-
-#include "random_init.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to initialize the weight matrix with the Nguyen-Widrow
- * method. The method is defined by
- *
- * @f{eqnarray*}{
- * \gamma &\le& w_i \le \gamma \\
- * \beta &=& 0.7H^{\frac{1}{I}} \\
- * n &=& \sqrt{\sum_{i=0}{I}w_{i}^{2}} \\
- * w_i &=& \frac{\beta w_i}{n}
- * @f}
- *
- * Where H is the number of neurons in the outgoing layer, I represents the
- * number of neurons in the ingoing layer and gamma defines the random interval
- * that is used to initialize the weights with a random value in a specific
- * range.
- */
-class NguyenWidrowInitialization
-{
- public:
-  /**
-   * Initialize the random initialization rule with the given lower bound and
-   * upper bound.
-   *
-   * @param lowerBound The number used as lower bound.
-   * @param upperBound The number used as upper bound.
-   */
-  NguyenWidrowInitialization(const double lowerBound = -0.5,
-                             const double upperBound = 0.5) :
-      lowerBound(lowerBound), upperBound(upperBound) { }
-
-  /**
-   * Initialize the elements of the specified weight matrix with the
-   * Nguyen-Widrow method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
-  {
-    RandomInitialization randomInit(lowerBound, upperBound);
-    randomInit.Initialize(W, rows, cols);
-
-    double beta = 0.7 * std::pow(cols, 1 / rows);
-    W *= (beta / arma::norm(W));
-  }
-
-  /**
-   * Initialize the elements of the specified weight 3rd order tensor with the
-   * Nguyen-Widrow method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   * @param slices Number of slices.
-   */
-  template<typename eT>
-  void Initialize(arma::Cube<eT>& W,
-                  const size_t rows,
-                  const size_t cols,
-                  const size_t slices)
-  {
-    W = arma::Cube<eT>(rows, cols, slices);
-
-    for (size_t i = 0; i < slices; i++)
-      Initialize(W.slice(i), rows, cols);
-  }
-
- private:
-  //! The number used as lower bound.
-  const double lowerBound;
-
-  //! The number used as upper bound.
-  const double upperBound;
-}; // class NguyenWidrowInitialization
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/init_rules/oivs_init.hpp b/src/mlpack/methods/ann/init_rules/oivs_init.hpp
deleted file mode 100644
index 75c8335..0000000
--- a/src/mlpack/methods/ann/init_rules/oivs_init.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * @file oivs_init.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the Optimal Initial Value Setting method
- * (OIVS). This initialization rule is based on geometrical considerations as
- * described by H. Shimodaira.
- *
- * For more information, see the following paper.
- *
- * @code
- * @inproceedings{ShimodairaICTAI1994,
- *   title={A weight value initialization method for improving learning
- *   performance of the backpropagation algorithm in neural networks},
- *   author={Shimodaira, H.},
- *   booktitle={Tools with Artificial Intelligence, 1994. Proceedings.,
- *   Sixth International Conference on},
- *   year={1994}
- * }
- * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_INIT_RULES_OIVS_INIT_HPP
-#define MLPACK_METHODS_ANN_INIT_RULES_OIVS_INIT_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-
-#include "random_init.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to initialize the weight matrix with the oivs method. The
- * method is based on the equations representing the characteristics of the
- * information transformation mechanism of a node. The method is defined by
- *
- * @f{eqnarray*}{
- * b &=& |F^{-1}(1 - \epsilon) - f^{-1}(\epsilon)| \\
- * \hat{w} &=& \frac{b}{k \cdot n} \\
- * \gamma &\le& a_i \le \gamma \\
- * w_i &=& \hat{w} \cdot \sqrt{a_i + 1}
- * @f}
- *
- * Where f is the transfer function epsilon, k custom parameters, n the number of
- * neurons in the outgoing layer and gamma a parameter that defines the random
- * interval.
- *
- * @tparam ActivationFunction The activation function used for the oivs method.
- */
-template<
-    class ActivationFunction = LogisticFunction
->
-class OivsInitialization
-{
- public:
-  /**
-   * Initialize the random initialization rule with the given values.
-   *
-   * @param epsilon Parameter to control the activation region.
-   * @param k Parameter to control the activation region width.
-   * @param gamma Parameter to define the uniform random range.
-   */
-  OivsInitialization(const double epsilon = 0.1,
-                     const int k = 5,
-                     const double gamma = 0.9) :
-      k(k), gamma(gamma),
-      b(std::abs(ActivationFunction::inv(1 - epsilon) -
-                 ActivationFunction::inv(epsilon)))
-  {
-  }
-
-  /**
-   * Initialize the elements of the specified weight matrix with the oivs method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
-  {
-    RandomInitialization randomInit(-gamma, gamma);
-    randomInit.Initialize(W, rows, cols);
-
-    W = (b / (k  * rows)) * arma::sqrt(W + 1);
-  }
-
-  /**
-   * Initialize the elements of the specified weight 3rd order tensor with the
-   * oivs method.
-   *
-   * @param W 3rd order tensor to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   * @param slices Number of slices.
-   */
-  template<typename eT>
-  void Initialize(arma::Cube<eT>& W,
-                  const size_t rows,
-                  const size_t cols,
-                  const size_t slices)
-  {
-    W = arma::Cube<eT>(rows, cols, slices);
-
-    for (size_t i = 0; i < slices; i++)
-      Initialize(W.slice(i), rows, cols);
-  }
-
- private:
-  //! Parameter to control the activation region width.
-  const int k;
-
-  //! Parameter to define the uniform random range.
-  const double gamma;
-
-  //! Parameter to control the activation region.
-  const double b;
-}; // class OivsInitialization
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/init_rules/orthogonal_init.hpp b/src/mlpack/methods/ann/init_rules/orthogonal_init.hpp
deleted file mode 100644
index ca16c3d..0000000
--- a/src/mlpack/methods/ann/init_rules/orthogonal_init.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * @file orthogonal_init.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the orthogonal matrix initialization method.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_INIT_RULES_ORTHOGONAL_INIT_HPP
-#define MLPACK_METHODS_ANN_INIT_RULES_ORTHOGONAL_INIT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to initialize the weight matrix with the orthogonal
- * matrix initialization
- */
-class OrthogonalInitialization
-{
- public:
-  /**
-   * Initialize the orthogonal matrix initialization rule with the given gain.
-   *
-   * @param gain The gain value.
-   */
-  OrthogonalInitialization(const double gain = 1.0) : gain(gain) { }
-
-  /**
-   * Initialize the elements of the specified weight matrix with the orthogonal
-   * matrix initialization method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
-  {
-    arma::Mat<eT> V;
-    arma::Col<eT> s;
-
-    arma::svd_econ(W, s, V, arma::randu<arma::Mat<eT> >(rows, cols));
-    W *= gain;
-  }
-
-  /**
-   * Initialize the elements of the specified weight 3rd order tensor with the
-   * orthogonal matrix initialization method.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   * @param slices Number of slices.
-   */
-  template<typename eT>
-  void Initialize(arma::Cube<eT>& W,
-                  const size_t rows,
-                  const size_t cols,
-                  const size_t slices)
-  {
-    W = arma::Cube<eT>(rows, cols, slices);
-
-    for (size_t i = 0; i < slices; i++)
-      Initialize(W.slice(i), rows, cols);
-  }
-
- private:
-  //! The number used as gain.
-  const double gain;
-}; // class OrthogonalInitialization
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/init_rules/zero_init.hpp b/src/mlpack/methods/ann/init_rules/zero_init.hpp
deleted file mode 100644
index f7c9b44..0000000
--- a/src/mlpack/methods/ann/init_rules/zero_init.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * @file zero_init.hpp
- * @author Marcus Edel
- *
- * Intialization rule for the neural networks. This simple initialization is
- * performed by assigning a zero matrix to the weight matrix.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_INIT_RULES_ZERO_INIT_HPP
-#define MLPACK_METHODS_ANN_INIT_RULES_ZERO_INIT_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class is used to initialize randomly the weight matrix.
- */
-class ZeroInitialization
-{
- public:
-  /**
-   *  Create the ZeroInitialization object.
-   */
-  ZeroInitialization() { /* Nothing to do here */ }
-
-  /**
-   * Initialize the elements of the specified weight matrix.
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
-  {
-    W = arma::zeros<arma::Mat<eT> >(rows, cols);
-  }
-
-  /**
-   * Initialize the elements of the specified weight (3rd order tensor).
-   *
-   * @param W Weight matrix to initialize.
-   * @param rows Number of rows.
-   * @param cols Number of columns.
-   */
-  template<typename eT>
-  void Initialize(arma::Cube<eT>& W,
-                  const size_t rows,
-                  const size_t cols,
-                  const size_t slices)
-  {
-    W = arma::zeros<arma::Cube<eT> >(rows, cols, slices);
-  }
-}; // class ZeroInitialization
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt
deleted file mode 100644
index b639cda..0000000
--- a/src/mlpack/methods/ann/layer/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  layer_traits.hpp
-  binary_classification_layer.hpp
-  base_layer.hpp
-  empty_layer.hpp
-  bias_layer.hpp
-  dropout_layer.hpp
-  dropconnect_layer.hpp
-  hard_tanh_layer.hpp
-  leaky_relu_layer.hpp
-  linear_layer.hpp
-  conv_layer.hpp
-  pooling_layer.hpp
-  recurrent_layer.hpp
-  lstm_layer.hpp
-  sparse_bias_layer.hpp
-  sparse_input_layer.hpp
-  sparse_output_layer.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/layer/base_layer.hpp b/src/mlpack/methods/ann/layer/base_layer.hpp
deleted file mode 100644
index 2b915a1..0000000
--- a/src/mlpack/methods/ann/layer/base_layer.hpp
+++ /dev/null
@@ -1,223 +0,0 @@
-/**
- * @file base_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the BaseLayer class, which attaches various functions to the
- * embedding layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_BASE_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_BASE_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/activation_functions/identity_function.hpp>
-#include <mlpack/methods/ann/activation_functions/rectifier_function.hpp>
-#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the base layer. The base layer works as a metaclass which
- * attaches various functions to the embedding layer.
- *
- * A few convenience typedefs are given:
- *
- *  - SigmoidLayer
- *  - IdentityLayer
- *  - ReLULayer
- *  - TanHLayer
- *  - BaseLayer2D
- *
- * @tparam ActivationFunction Activation function used for the embedding layer.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    class ActivationFunction = LogisticFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class BaseLayer
-{
- public:
-  /**
-   * Create the BaseLayer object.
-   */
-  BaseLayer()
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    ActivationFunction::fn(input, output);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& gy,
-                DataType& g)
-  {
-    DataType derivative;
-    ActivationFunction::deriv(input, derivative);
-    g = gy % derivative;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    arma::Cube<eT> derivative;
-    ActivationFunction::deriv(input, derivative);
-    g = mappedError % derivative;
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& /* ar */, const unsigned int /* version */)
-  {
-    /* Nothing to do here */
-  }
-
- private:
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class BaseLayer
-
-// Convenience typedefs.
-
-/**
- * Standard Sigmoid-Layer using the logistic activation function.
- */
-template <
-    class ActivationFunction = LogisticFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-using SigmoidLayer = BaseLayer<
-    ActivationFunction, InputDataType, OutputDataType>;
-
-/**
- * Standard Identity-Layer using the identity activation function.
- */
-template <
-    class ActivationFunction = IdentityFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-using IdentityLayer = BaseLayer<
-    ActivationFunction, InputDataType, OutputDataType>;
-
-/**
- * Standard rectified linear unit non-linearity layer.
- */
-template <
-    class ActivationFunction = RectifierFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-using ReLULayer = BaseLayer<
-    ActivationFunction, InputDataType, OutputDataType>;
-
-/**
- * Standard hyperbolic tangent layer.
- */
-template <
-    class ActivationFunction = TanhFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-using TanHLayer = BaseLayer<
-    ActivationFunction, InputDataType, OutputDataType>;
-
-/**
- * Standard Base-Layer2D using the logistic activation function.
- */
-template <
-    class ActivationFunction = LogisticFunction,
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-using BaseLayer2D = BaseLayer<
-    ActivationFunction, InputDataType, OutputDataType>;
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp
deleted file mode 100644
index 0be535d..0000000
--- a/src/mlpack/methods/ann/layer/bias_layer.hpp
+++ /dev/null
@@ -1,208 +0,0 @@
-/**
- * @file bias_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the BiasLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a standard bias layer. The BiasLayer class represents a
- * single layer of a neural network.
- *
- * A convenient typedef is given:
- *
- *  - 2DBiasLayer
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class BiasLayer
-{
- public:
-  /**
-   * Create the BiasLayer object using the specified number of units and bias
-   * parameter.
-   *
-   * @param outSize The number of output units.
-   * @param bias The bias value.
-   */
-  BiasLayer(const size_t outSize, const double bias = 1) :
-      outSize(outSize),
-      bias(bias)
-  {
-    weights.set_size(outSize, 1);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = input + (weights * bias);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = input;
-    for (size_t s = 0; s < input.n_slices; s++)
-    {
-      output.slice(s) += weights(s) * bias;
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType, typename ErrorType>
-  void Backward(const DataType& /* unused */,
-                const ErrorType& gy,
-                ErrorType& g)
-  {
-    g = gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the bias.
-   *
-   * @param input The propagated input.
-   * @param error The calculated error.
-   * @param gradient The calculated gradient.
-   */
-  template<typename eT, typename ErrorType, typename GradientType>
-  void Gradient(const arma::Mat<eT>& /* input */,
-                const ErrorType& error,
-                GradientType& gradient)
-  {
-    gradient = error * bias;
-  }
-
-  //! Get the weights.
-  InputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  InputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  InputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  InputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(bias, "bias");
-  }
-
- private:
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored bias value.
-  double bias;
-
-  //! Locally-stored weight object.
-  InputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  InputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class BiasLayer
-
-//! Layer traits for the bias layer.
-template<typename InputDataType, typename OutputDataType>
-class LayerTraits<BiasLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = true;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-/**
- * Standard 2D-Bias-Layer.
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::cube
->
-using BiasLayer2D = BiasLayer<InputDataType, OutputDataType>;
-
-/**
- * Standard 2D-Bias-Layer.
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-using AdditionLayer = BiasLayer<InputDataType, OutputDataType>;
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp
deleted file mode 100644
index 1b3d617..0000000
--- a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * @file binary_classification_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the BinaryClassificationLayer class, which implements a
- * binary class classification layer that can be used as output layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a binary classification layer that can be used as
- * output layer.
- */
-class BinaryClassificationLayer
-{
- public:
-  /**
-   * Create the BinaryClassificationLayer object.
-   *
-   * @param confidence The confidence used for the output class transformation.
-   */
-  BinaryClassificationLayer(const double confidence = 0.5) :
-      confidence(confidence)
-  {
-    // Nothing to do here.
-  }
-
-  /*
-   * Calculate the error using the specified input activation and the target.
-   * The error is stored into the given error parameter.
-   *
-   * @param inputActivations Input data used for evaluating the network.
-   * @param target Target data used for evaluating the network.
-   * @param error The calculated error with respect to the input activation and
-   * the given target.
-   */
-  template<typename DataType>
-  void CalculateError(const DataType& inputActivations,
-                      const DataType& target,
-                      DataType& error)
-  {
-    error = inputActivations - target;
-  }
-
-  /*
-   * Calculate the output class using the specified input activation.
-   *
-   * @param inputActivations Input data used to calculate the output class.
-   * @param output Output class of the input activation.
-   */
-  template<typename DataType>
-  void OutputClass(const DataType& inputActivations, DataType& output)
-  {
-    output = inputActivations;
-
-    for (size_t i = 0; i < output.n_elem; i++)
-      output(i) = output(i) > confidence ? 1 : 0;
-  }
-
-  //! Get the confidence parameter.
-  double const& Confidence() const { return confidence; }
-  //! Modify the confidence parameter.
-  double& Confidence() { return confidence; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(confidence, "confidence");
-  }
-
- private:
-   double confidence;
-
-}; // class BinaryClassificationLayer
-
-//! Layer traits for the binary class classification layer.
-template <>
-class LayerTraits<BinaryClassificationLayer>
-{
- public:
-  static const bool IsBinary = true;
-  static const bool IsOutputLayer = true;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant_layer.hpp
deleted file mode 100644
index 31da87e..0000000
--- a/src/mlpack/methods/ann/layer/constant_layer.hpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * @file constant_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the ConstantLayer class, which outputs a constant value given
- * any input.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the constant layer. The constant layer outputs a given
- * constant value given any input value.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class ConstantLayer
-{
- public:
-  /**
-   * Create the ConstantLayer object that outputs a given constant scalar value
-   * given any input value.
-   *
-   * @param outSize The number of output units.
-   * @param scalar The constant value used to create the constant output.
-   */
-  ConstantLayer(const size_t outSize, const double scalar)
-  {
-    constantOutput = OutputDataType(outSize, 1);
-    constantOutput.fill(scalar);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network. The forward pass fills the
-   * output with the specified constant parameter.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& /* input */, arma::Mat<eT>& output)
-  {
-    output = constantOutput;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network. The backward pass of the
-   * constant layer is returns always a zero output error matrix.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Mat<eT>& /* input */,
-                const arma::Mat<eT>& /* gy */,
-                arma::Mat<eT>& g)
-  {
-    g = arma::zeros<arma::Mat<eT> >(inputParameter.n_rows,
-        inputParameter.n_cols);
-  }
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(constantOutput, "constantOutput");
-  }
-
- private:
-  //! Locally-stored constant output matrix.
-  OutputDataType constantOutput;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class ConstantLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp
deleted file mode 100644
index bbb918c..0000000
--- a/src/mlpack/methods/ann/layer/conv_layer.hpp
+++ /dev/null
@@ -1,324 +0,0 @@
-/**
- * @file conv_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the ConvLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
-#include <mlpack/methods/ann/convolution_rules/naive_convolution.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the ConvLayer class. The ConvLayer class represents a
- * single layer of a neural network.
- *
- * @tparam ForwardConvolutionRule Convolution to perform forward process.
- * @tparam BackwardConvolutionRule Convolution to perform backward process.
- * @tparam GradientConvolutionRule Convolution to calculate gradient.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename ForwardConvolutionRule = NaiveConvolution<ValidConvolution>,
-    typename BackwardConvolutionRule = NaiveConvolution<FullConvolution>,
-    typename GradientConvolutionRule = NaiveConvolution<ValidConvolution>,
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-class ConvLayer
-{
- public:
-  /**
-   * Create the ConvLayer object using the specified number of input maps,
-   * output maps, filter size, stride and padding parameter.
-   *
-   * @param inMaps The number of input maps.
-   * @param outMaps The number of output maps.
-   * @param wfilter Width of the filter/kernel.
-   * @param wfilter Height of the filter/kernel.
-   * @param xStride Stride of filter application in the x direction.
-   * @param yStride Stride of filter application in the y direction.
-   * @param wPad Spatial padding width of the input.
-   * @param hPad Spatial padding height of the input.
-   */
-  ConvLayer(const size_t inMaps,
-            const size_t outMaps,
-            const size_t wfilter,
-            const size_t hfilter,
-            const size_t xStride = 1,
-            const size_t yStride = 1,
-            const size_t wPad = 0,
-            const size_t hPad = 0) :
-      wfilter(wfilter),
-      hfilter(hfilter),
-      inMaps(inMaps),
-      outMaps(outMaps),
-      xStride(xStride),
-      yStride(yStride),
-      wPad(wPad),
-      hPad(hPad)
-  {
-    weights.set_size(wfilter, hfilter, inMaps * outMaps);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    const size_t wConv = ConvOutSize(input.n_rows, wfilter, xStride, wPad);
-    const size_t hConv = ConvOutSize(input.n_cols, hfilter, yStride, hPad);
-
-    output = arma::zeros<arma::Cube<eT> >(wConv, hConv, outMaps);
-    for (size_t outMap = 0, outMapIdx = 0; outMap < outMaps; outMap++)
-    {
-      for (size_t inMap = 0; inMap < inMaps; inMap++, outMapIdx++)
-      {
-        arma::Mat<eT> convOutput;
-        ForwardConvolutionRule::Convolution(input.slice(inMap),
-            weights.slice(outMap), convOutput);
-
-        output.slice(outMap) += convOutput;
-      }
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& /* unused */,
-                const arma::Cube<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    g = arma::zeros<arma::Cube<eT> >(inputParameter.n_rows,
-                                     inputParameter.n_cols,
-                                     inputParameter.n_slices);
-
-    for (size_t outMap = 0, outMapIdx = 0; outMap < inMaps; outMap++)
-    {
-      for (size_t inMap = 0; inMap < outMaps; inMap++, outMapIdx++)
-      {
-        arma::Mat<eT> rotatedFilter;
-        Rotate180(weights.slice(outMap * outMaps + inMap), rotatedFilter);
-
-        arma::Mat<eT> output;
-        BackwardConvolutionRule::Convolution(gy.slice(inMap), rotatedFilter,
-            output);
-
-        g.slice(outMap) += output;
-      }
-    }
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Gradient(const InputType& input,
-                const arma::Cube<eT>& d,
-                arma::Cube<eT>& g)
-  {
-    g = arma::zeros<arma::Cube<eT> >(weights.n_rows, weights.n_cols,
-        weights.n_slices);
-
-    for (size_t outMap = 0; outMap < outMaps; outMap++)
-    {
-      for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps)
-      {
-        arma::Cube<eT> inputSlices = input.slices(inMap, inMap);
-        arma::Cube<eT> deltaSlices = d.slices(outMap, outMap);
-
-        arma::Cube<eT> output;
-        GradientConvolutionRule::Convolution(inputSlices, deltaSlices, output);
-
-        for (size_t i = 0; i < output.n_slices; i++)
-          g.slice(s) += output.slice(i);
-      }
-    }
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(wfilter, "wfilter");
-    ar & data::CreateNVP(hfilter, "hfilter");
-    ar & data::CreateNVP(inMaps, "inMaps");
-    ar & data::CreateNVP(outMaps, "outMaps");
-    ar & data::CreateNVP(xStride, "xStride");
-    ar & data::CreateNVP(yStride, "yStride");
-    ar & data::CreateNVP(wPad, "wPad");
-    ar & data::CreateNVP(hPad, "hPad");
-  }
-
- private:
-  /*
-   * Rotates a 3rd-order tesor counterclockwise by 180 degrees.
-   *
-   * @param input The input data to be rotated.
-   * @param output The rotated output.
-   */
-  template<typename eT>
-  void Rotate180(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = arma::Cube<eT>(input.n_rows, input.n_cols, input.n_slices);
-
-    // * left-right flip, up-down flip */
-    for (size_t s = 0; s < output.n_slices; s++)
-      output.slice(s) = arma::fliplr(arma::flipud(input.slice(s)));
-  }
-
-  /*
-   * Rotates a dense matrix counterclockwise by 180 degrees.
-   *
-   * @param input The input data to be rotated.
-   * @param output The rotated output.
-   */
-  template<typename eT>
-  void Rotate180(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    // * left-right flip, up-down flip */
-    output = arma::fliplr(arma::flipud(input));
-  }
-
-  /*
-   * Return the convolution output size.
-   *
-   * @param size The size of the input (row or column).
-   * @param k The size of the filter (width or height).
-   * @param s The stride size (x or y direction).
-   * @param p The size of the padding (width or height).
-   * @return The convolution output size.
-   */
-  size_t ConvOutSize(const size_t size,
-                     const size_t k,
-                     const size_t s,
-                     const size_t p)
-  {
-    return std::floor(size + p * 2 - k) / s + 1;
-  }
-
-  //! Locally-stored filter/kernel width.
-  size_t wfilter;
-
-  //! Locally-stored filter/kernel height.
-  size_t hfilter;
-
-  //! Locally-stored number of input maps.
-  size_t inMaps;
-
-  //! Locally-stored number of output maps.
-  size_t outMaps;
-
-  //! Locally-stored stride of the filter in x-direction.
-  size_t xStride;
-
-  //! Locally-stored stride of the filter in y-direction.
-  size_t yStride;
-
-  //! Locally-stored padding width.
-  size_t wPad;
-
-  //! Locally-stored padding height.
-  size_t hPad;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class ConvLayer
-
-//! Layer traits for the convolution layer.
-template<
-    typename ForwardConvolutionRule,
-    typename BackwardConvolutionRule,
-    typename GradientConvolutionRule,
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<ConvLayer<ForwardConvolutionRule,
-                            BackwardConvolutionRule,
-                            GradientConvolutionRule,
-                            InputDataType,
-                            OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
deleted file mode 100644
index fdb14cb..0000000
--- a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
+++ /dev/null
@@ -1,361 +0,0 @@
-/**
- * @file dropconnect_layer.hpp
- * @author Palash Ahuja
- *
- * Definition of the DropConnectLayer class, which implements a regularizer
- * that randomly sets connections to zero. Preventing units from co-adapting.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-#include "empty_layer.hpp"
-#include <mlpack/methods/ann/network_util.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The DropConnect layer is a regularizer that randomly with probability
- * ratio sets the connection values to zero and scales the remaining
- * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p)
- * when deterministic is false. In the deterministic mode(during testing),
- * the layer just computes the output. The output is computed according
- * to the input layer. If no input layer is given, it will take a linear layer
- * as default.
- *
- * Note:
- * During training you should set deterministic to false and during testing
- * you should set deterministic to true.
- *
- *  For more information, see the following.
- *
- * @code
- * @inproceedings{WanICML2013,
- *   title={Regularization of Neural Networks using DropConnect},
- *   booktitle = {Proceedings of the 30th International Conference on Machine
- *                Learning(ICML - 13)},
- *   author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and
- *             Rob Fergus},
- *   year = {2013}
- * }
- * @endcode
- *
- * @tparam InputLayer Layer used instead of the internal linear layer.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template<
-    typename InputLayer = EmptyLayer<arma::mat, arma::mat>,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class DropConnectLayer
-{
- public:
- /**
-   * Creates the DropConnect Layer as a Linear Object that takes input size,
-   * output size and ratio as parameter.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   * @param ratio The probability of setting a value to zero.
-   */
-  DropConnectLayer (const size_t inSize,
-                    const size_t outSize,
-                    const double ratio = 0.5) :
-      inSize(inSize),
-      outSize(outSize),
-      ratio(ratio),
-      scale(1.0 / (1 - ratio)),
-      uselayer(false)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Create the DropConnectLayer object using the specified ratio and rescale
-   * parameter. This takes the
-   *
-   * @param ratio The probability of setting a connection to zero.
-   * @param inputLayer the layer object that the dropconnect connection would take.
-   */
-  template<typename InputLayerType>
-  DropConnectLayer(InputLayerType &&inputLayer,
-                   const double ratio = 0.5) :
-      baseLayer(std::forward<InputLayerType>(inputLayer)),
-      ratio(ratio),
-      scale(1.0 / (1 - ratio)),
-      uselayer(true)
-  {
-    static_assert(std::is_same<typename std::decay<InputLayerType>::type,
-                  InputLayer>::value,
-                  "The type of the inputLayer must be InputLayerType");
-  }
-  /**
-  * Ordinary feed forward pass of the DropConnect layer.
-  *
-  * @param input Input data used for evaluating the specified function.
-  * @param output Resulting output activation.
-  */
-  template<typename eT>
-  void Forward(const arma::Mat<eT> &input, arma::Mat<eT> &output)
-  {
-    // The DropConnect mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      if (uselayer)
-      {
-        baseLayer.Forward(input, output);
-      }
-      else
-      {
-        output = weights * input;
-      }
-    }
-    else
-    {
-      if (uselayer)
-      {
-        // Scale with input / (1 - ratio) and set values to zero with
-        // probability ratio.
-        mask = arma::randu<arma::Mat<eT> >(baseLayer.Weights().n_rows,
-            baseLayer.Weights().n_cols);
-        mask.transform([&](double val) { return (val > ratio); });
-
-        // Save weights for denoising.
-        denoise = baseLayer.Weights();
-
-        baseLayer.Weights() = baseLayer.Weights() % mask;
-
-        baseLayer.Forward(input, output);
-      }
-      else
-      {
-        // Scale the input / ( 1 - ratio) and set values to zero with
-        // probability ratio.
-        mask = arma::randu<arma::Mat<eT> >(weights.n_rows, weights.n_cols);
-        mask.transform([&](double val) { return (val > ratio); });
-
-        // Save weights for denoising.
-        denoise = weights;
-
-        weights = weights % mask;
-        output = weights * input;
-      }
-
-      output = output * scale;
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of the DropConnect layer.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input, const DataType& gy, DataType& g)
-  {
-    if (uselayer)
-    {
-      baseLayer.Backward(input, gy, g);
-    }
-    else
-    {
-      g = weights.t() * gy;
-    }
-  }
-
-  /**
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& input,
-                const arma::Mat<eT>& d,
-                GradientDataType& g)
-  {
-    if (uselayer)
-    {
-      baseLayer.Gradient(input, d, g);
-
-      // Denoise the weights.
-      baseLayer.Weights() = denoise;
-    }
-    else
-    {
-      g = d * input.t();
-
-      // Denoise the weights.
-      weights = denoise;
-    }
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const
-  {
-    if (uselayer)
-      return baseLayer.Weights();
-
-    return weights;
-  }
-
-  //! Modify the weights.
-  OutputDataType& Weights()
-  {
-    if (uselayer)
-      return baseLayer.Weights();
-
-    return weights;
-  }
-
-  //! Get the input parameter.
-  InputDataType &InputParameter() const
-  {
-    if (uselayer)
-      return baseLayer.InputParameter();
-
-    return inputParameter;
-  }
-
-  //! Modify the input parameter.
-  InputDataType &InputParameter()
-  {
-    if (uselayer)
-      return baseLayer.InputParameter();
-
-    return inputParameter;
-  }
-
-  //! Get the output parameter.
-  OutputDataType &OutputParameter() const
-  {
-    if (uselayer)
-      return baseLayer.OutputParameter();
-
-    return outputParameter;
-  }
-
-  //! Modify the output parameter.
-  OutputDataType &OutputParameter()
-  {
-    if (uselayer)
-      return baseLayer.OutputParameter();
-
-    return outputParameter;
-  }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const
-  {
-    if (uselayer)
-      return baseLayer.Delta();
-
-    return delta;
-  }
-
-  //! Modify the delta.
-  OutputDataType& Delta()
-  {
-    if (uselayer)
-      return baseLayer.Delta();
-
-    return delta;
-  }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const
-  {
-    if (uselayer)
-      return baseLayer.Gradient();
-
-    return gradient;
-   }
-
-  //! Modify the gradient.
-  OutputDataType& Gradient()
-  {
-    if (uselayer)
-      return baseLayer.Gradient();
-
-    return gradient;
-  }
-
-  //! The value of the deterministic parameter.
-  bool Deterministic() const { return deterministic; }
-
-  //! Modify the value of the deterministic parameter.
-  bool &Deterministic() { return deterministic; }
-
-  //! The probability of setting a value to zero.
-  double Ratio() const { return ratio; }
-
-  //! Modify the probability of setting a value to zero.
-  void Ratio(const double r)
-  {
-    ratio = r;
-    scale = 1.0 / (1.0 - ratio);
-  }
-
-private:
-  //! Locally-stored layer object.
-  InputLayer baseLayer;
-
-  //! Locally stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! The probability of setting a value to zero.
-  double ratio;
-
-  //! The scale fraction.
-  double scale;
-
-  //! If true the default layer is used otherwise a new layer will be created.
-  bool uselayer;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored mast object.
-  OutputDataType mask;
-
-  //! If true dropout and scaling is disabled, see notes above.
-  bool deterministic;
-
-  //! Denoise mask for the weights.
-  OutputDataType denoise;
-}; // class DropConnectLayer.
-
-}  // namespace ann
-}  // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout_layer.hpp
deleted file mode 100644
index 3ed0bd6..0000000
--- a/src/mlpack/methods/ann/layer/dropout_layer.hpp
+++ /dev/null
@@ -1,252 +0,0 @@
-/**
- * @file dropout_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the DropoutLayer class, which implements a regularizer that
- * randomly sets units to zero. Preventing units from co-adapting.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The dropout layer is a regularizer that randomly with probability ratio
- * sets input values to zero and scales the remaining elements by factor 1 /
- * (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when
- * deterministic is false. In the deterministic mode (during testing), the layer
- * just scales the output.
- *
- * Note: During training you should set deterministic to false and during
- * testing you should set deterministic to true.
- *
- * For more information, see the following.
- *
- * @code
- * @article{Hinton2012,
- *   author  = {Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky,
- *              Ilya Sutskever, Ruslan Salakhutdinov},
- *   title   = {Improving neural networks by preventing co-adaptation of feature
- *              detectors},
- *   journal = {CoRR},
- *   volume  = {abs/1207.0580},
- *   year    = {2012},
- * }
- * @endcode
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class DropoutLayer
-{
- public:
-
-  /**
-   * Create the DropoutLayer object using the specified ratio and rescale
-   * parameter.
-   *
-   * @param ratio The probability of setting a value to zero.
-   * @param rescale If true the input is rescaled when deterministic is False.
-   */
-  DropoutLayer(const double ratio = 0.5,
-               const bool rescale = true) :
-      ratio(ratio),
-      scale(1.0 / (1.0 - ratio)),
-      rescale(rescale)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of the dropout layer.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    // The dropout mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      if (!rescale)
-      {
-        output = input;
-      }
-      else
-      {
-        output = input * scale;
-      }
-    }
-    else
-    {
-      // Scale with input / (1 - ratio) and set values to zero with probability
-      // ratio.
-      mask = arma::randu<arma::Mat<eT> >(input.n_rows, input.n_cols);
-      mask.transform( [&](double val) { return (val > ratio); } );
-      output = input % mask * scale;
-    }
-  }
-
-  /**
-   * Ordinary feed forward pass of the dropout layer.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    // The dropout mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      if (!rescale)
-      {
-        output = input;
-      }
-      else
-      {
-        output = input * scale;
-      }
-    }
-    else
-    {
-      // Scale with input / (1 - ratio) and set values to zero with probability
-      // ratio.
-      mask = arma::randu<arma::Cube<eT> >(input.n_rows, input.n_cols,
-          input.n_slices);
-      mask.transform( [&](double val) { return (val > ratio); } );
-      output = input % mask * scale;
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of the dropout layer.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& /* unused */,
-                const DataType& gy,
-                DataType& g)
-  {
-    g = gy % mask * scale;
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the detla.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! The value of the deterministic parameter.
-  bool Deterministic() const { return deterministic; }
-  //! Modify the value of the deterministic parameter.
-  bool& Deterministic() { return deterministic; }
-
-  //! The probability of setting a value to zero.
-  double Ratio() const { return ratio; }
-
-  //! Modify the probability of setting a value to zero.
-  void Ratio(const double r)
-  {
-    ratio = r;
-    scale = 1.0 / (1.0 - ratio);
-  }
-
-  //! The value of the rescale parameter.
-  bool Rescale() const {return rescale; }
-  //! Modify the value of the rescale parameter.
-  bool& Rescale() {return rescale; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(ratio, "ratio");
-    ar & data::CreateNVP(rescale, "rescale");
-  }
-
- private:
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored mast object.
-  OutputDataType mask;
-
-  //! The probability of setting a value to zero.
-  double ratio;
-
-  //! The scale fraction.
-  double scale;
-
-  //! If true dropout and scaling is disabled, see notes above.
-  bool deterministic;
-
-  //! If true the input is rescaled when deterministic is False.
-  bool rescale;
-}; // class DropoutLayer
-
-//! Layer traits for the bias layer.
-template <
-  typename InputDataType,
-  typename OutputDataType
->
-class LayerTraits<DropoutLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-/**
- * Standard Dropout-Layer2D.
- */
-template <
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-using DropoutLayer2D = DropoutLayer<InputDataType, OutputDataType>;
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/empty_layer.hpp b/src/mlpack/methods/ann/layer/empty_layer.hpp
deleted file mode 100644
index cf5a70e..0000000
--- a/src/mlpack/methods/ann/layer/empty_layer.hpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * @file empty_layer.hpp
- * @author Palash Ahuja
- *
- * Definition of the EmptyLayer class, which is basically empty.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_EMPTY_LAYER_HPP
-
-namespace mlpack{
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the EmptyLayer class. The EmptyLayer class represents a
- * single layer which is mainly used as placeholder.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class EmptyLayer
-{
-  public:
-  /**
-   * Creates the empty layer object. All the methods are
-   * empty as well.
-   */
-  EmptyLayer() { /* Nothing to do here. */ }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& /* input */, OutputType& /* output */)
-  {
-    /* Nothing to do here. */
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename ErrorType, typename GradientType>
-  void Backward(const InputType& /* input */,
-                const ErrorType& /* gy */,
-                GradientType& /* g */)
-  {
-    /* Nothing to do here. */
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename ErrorType, typename GradientType>
-  void Gradient(const InputType& /* input */,
-                const ErrorType& /* error */,
-                GradientType& /* gradient */)
-  {
-    /* Nothing to do here. */
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class EmptyLayer
-
-} //namespace ann
-} //namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
deleted file mode 100644
index 3f1e9df..0000000
--- a/src/mlpack/methods/ann/layer/glimpse_layer.hpp
+++ /dev/null
@@ -1,484 +0,0 @@
-/**
- * @file glimpse_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the GlimpseLayer class, which takes an input image and a
- * location to extract a retina-like representation of the input image at
- * different increasing scales.
- *
- * For more information, see the following.
- *
- * @code
- * @article{CoRR2014,
- *   author  = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
- *   title   = {Recurrent Models of Visual Attention},
- *   journal = {CoRR},
- *   volume  = {abs/1406.6247},
- *   year    = {2014},
- * }
- * @endcode
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
-#include <algorithm>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The glimpse layer returns a retina-like representation
- * (down-scaled cropped images) of increasing scale around a given location in a
- * given image.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-class GlimpseLayer
-{
- public:
-
-  /**
-   * Create the GlimpseLayer object using the specified ratio and rescale
-   * parameter.
-   *
-   * @param inSize The size of the input units.
-   * @param size The used glimpse size (height = width).
-   * @param depth The number of patches to crop per glimpse.
-   * @param scale The scaling factor used to create the increasing retina-like
-   *        representation.
-   */
-  GlimpseLayer(const size_t inSize,
-               const size_t size,
-               const size_t depth = 3,
-               const size_t scale = 2) :
-      inSize(inSize),
-      size(size),
-      depth(depth),
-      scale(scale)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of the glimpse layer.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = arma::Cube<eT>(size, size, depth * input.n_slices);
-
-    inputDepth = input.n_slices / inSize;
-
-    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
-    {
-      for (size_t depthIdx = 0, glimpseSize = size;
-          depthIdx < depth; depthIdx++, glimpseSize *= scale)
-      {
-        size_t padSize = std::floor((glimpseSize - 1) / 2);
-
-        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
-            input.n_rows + padSize * 2, input.n_cols + padSize * 2,
-            input.n_slices / inSize);
-
-        inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1,
-            padSize + input.n_cols - 1) = input.subcube(0, 0,
-            inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1,
-            (inputIdx + 1) * inputDepth - 1);
-
-        size_t h = inputPadded.n_rows - glimpseSize;
-        size_t w = inputPadded.n_cols - glimpseSize;
-
-        size_t x = std::min(h, (size_t) std::max(0.0,
-            (location(0, inputIdx) + 1) / 2.0 * h));
-        size_t y = std::min(w, (size_t) std::max(0.0,
-            (location(1, inputIdx) + 1) / 2.0 * w));
-
-        if (depthIdx == 0)
-        {
-          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
-              j < output.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            output.slice(j) = inputPadded.subcube(x, y,
-                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                paddedSlice);
-          }
-        }
-        else
-        {
-          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
-              j < output.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            arma::Mat<eT> poolingInput = inputPadded.subcube(x, y,
-                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                paddedSlice);
-
-            if (scale == 2)
-            {
-              Pooling(glimpseSize / size, poolingInput, output.slice(j));
-            }
-            else
-            {
-              ReSampling(poolingInput, output.slice(j));
-            }
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of the glimpse layer.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename ErrorType, typename eT>
-  void Backward(const InputType& input,
-                const ErrorType& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    g = arma::zeros<arma::cube>(inputParameter.n_rows, inputParameter.n_cols,
-        inputParameter.n_slices);
-
-    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
-    {
-      for (size_t depthIdx = 0, glimpseSize = size;
-          depthIdx < depth; depthIdx++, glimpseSize *= scale)
-      {
-        size_t padSize = std::floor((glimpseSize - 1) / 2);
-
-        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
-            inputParameter.n_rows + padSize * 2, inputParameter.n_cols +
-            padSize * 2, inputParameter.n_slices / inSize);
-
-        size_t h = inputPadded.n_rows - glimpseSize;
-        size_t w = inputPadded.n_cols - glimpseSize;
-
-        size_t x = std::min(h, (size_t) std::max(0.0,
-            (location(0, inputIdx) + 1) / 2.0 * h));
-        size_t y = std::min(w, (size_t) std::max(0.0,
-            (location(1, inputIdx) + 1) / 2.0 * w));
-
-        if (depthIdx == 0)
-        {
-          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
-              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            inputPadded.subcube(x, y,
-            paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-            paddedSlice) = mappedError.slice(j);
-          }
-        }
-        else
-        {
-          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
-              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
-          {
-            arma::Mat<eT> poolingOutput = inputPadded.subcube(x, y,
-                 paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                 paddedSlice);
-
-            if (scale == 2)
-            {
-              Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j),
-                  poolingOutput);
-            }
-            else
-            {
-              DownwardReSampling(inputParameter.slice(paddedSlice),
-                  mappedError.slice(j), poolingOutput);
-            }
-
-            inputPadded.subcube(x, y,
-                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
-                paddedSlice) = poolingOutput;
-          }
-        }
-
-        g += inputPadded.tube(padSize, padSize, padSize +
-            inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1);
-      }
-    }
-
-    Transform(g);
-  }
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const {return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const {return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the detla.
-  OutputDataType& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Set the locationthe x and y coordinate of the center of the output
-  //! glimpse.
-  void Location(const arma::mat& location)
-  {
-    this->location = location;
-  }
-
- private:
-  /*
-   * Transform the given input by changing rows to columns.
-   *
-   * @param w The input matrix used to perform the transformation.
-   */
-  void Transform(arma::mat& w)
-  {
-    arma::mat t = w;
-
-    for (size_t i = 0, k = 0; i < w.n_elem; k++)
-    {
-      for (size_t j = 0; j < w.n_cols; j++, i++)
-      {
-        w(k, j) = t(i);
-      }
-    }
-  }
-
-  /*
-   * Transform the given input by changing rows to columns.
-   *
-   * @param w The input matrix used to perform the transformation.
-   */
-  void Transform(arma::cube& w)
-  {
-    for (size_t i = 0; i < w.n_slices; i++)
-    {
-      arma::mat t = w.slice(i);
-      Transform(t);
-      w.slice(i) = t;
-    }
-  }
-
-  /**
-   * Apply pooling to the input and store the results to the output parameter.
-   *
-   * @param kSize the kernel size used to perform the pooling operation.
-   * @param input The input to be apply the pooling rule.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void Pooling(const size_t kSize,
-               const arma::Mat<eT>& input,
-               arma::Mat<eT>& output)
-  {
-
-    const size_t rStep = kSize;
-    const size_t cStep = kSize;
-
-    for (size_t j = 0; j < input.n_cols; j += cStep)
-    {
-      for (size_t i = 0; i < input.n_rows; i += rStep)
-      {
-        output(i / rStep, j / cStep) += pooling.Pooling(
-            input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1)));
-      }
-    }
-  }
-
-  /**
-   * Apply unpooling to the input and store the results.
-   *
-   * @param input The input to be apply the unpooling rule.
-   * @param error The error used to perform the unpooling operation.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void Unpooling(const arma::Mat<eT>& input,
-                 const arma::Mat<eT>& error,
-                 arma::Mat<eT>& output)
-  {
-    const size_t rStep = input.n_rows / error.n_rows;
-    const size_t cStep = input.n_cols / error.n_cols;
-
-    arma::Mat<eT> unpooledError;
-    for (size_t j = 0; j < input.n_cols; j += cStep)
-    {
-      for (size_t i = 0; i < input.n_rows; i += rStep)
-      {
-        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
-                                               arma::span(j, j + cStep - 1));
-
-        pooling.Unpooling(inputArea, error(i / rStep, j / cStep),
-            unpooledError);
-
-        output(arma::span(i, i + rStep - 1),
-            arma::span(j, j + cStep - 1)) += unpooledError;
-      }
-    }
-  }
-
-  /**
-   * Apply ReSampling to the input and store the results in the output
-   * parameter.
-   *
-   * @param input The input to be apply the ReSampling rule.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void ReSampling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    double wRatio = (double) (input.n_rows - 1) / (size - 1);
-    double hRatio = (double) (input.n_cols - 1) / (size - 1);
-
-    double iWidth = input.n_rows - 1;
-    double iHeight = input.n_cols - 1;
-
-    for (size_t y = 0; y < size; y++)
-    {
-      for (size_t x = 0; x < size; x++)
-      {
-        double ix = wRatio * x;
-        double iy = hRatio * y;
-
-        // Get the 4 nearest neighbors.
-        double ixNw = std::floor(ix);
-        double iyNw = std::floor(iy);
-        double ixNe = ixNw + 1;
-        double iySw = iyNw + 1;
-
-        // Get surfaces to each neighbor.
-        double se = (ix - ixNw) * (iy - iyNw);
-        double sw = (ixNe - ix) * (iy - iyNw);
-        double ne = (ix - ixNw) * (iySw - iy);
-        double nw = (ixNe - ix) * (iySw - iy);
-
-        // Calculate the weighted sum.
-        output(y, x) = input(iyNw, ixNw) * nw +
-            input(iyNw, std::min(ixNe,  iWidth)) * ne +
-            input(std::min(iySw, iHeight), ixNw) * sw +
-            input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se;
-      }
-    }
-  }
-
-  /**
-   * Apply DownwardReSampling to the input and store the results into the output
-   * parameter.
-   *
-   * @param input The input to be apply the DownwardReSampling rule.
-   * @param error The error used to perform the DownwardReSampling operation.
-   * @param output The DownwardReSampled result.
-   */
-  template<typename eT>
-  void DownwardReSampling(const arma::Mat<eT>& input,
-                          const arma::Mat<eT>& error,
-                          arma::Mat<eT>& output)
-  {
-    double iWidth = input.n_rows - 1;
-    double iHeight = input.n_cols - 1;
-
-    double wRatio = iWidth / (size - 1);
-    double hRatio = iHeight / (size - 1);
-
-    for (size_t y = 0; y < size; y++)
-    {
-      for (size_t x = 0; x < size; x++)
-      {
-        double ix = wRatio * x;
-        double iy = hRatio * y;
-
-        // Get the 4 nearest neighbors.
-        double ixNw = std::floor(ix);
-        double iyNw = std::floor(iy);
-        double ixNe = ixNw + 1;
-        double iySw = iyNw + 1;
-
-        // Get surfaces to each neighbor.
-        double se = (ix - ixNw) * (iy - iyNw);
-        double sw = (ixNe - ix) * (iy - iyNw);
-        double ne = (ix - ixNw) * (iySw - iy);
-        double nw = (ixNe - ix) * (iySw - iy);
-
-        double ograd = error(y, x);
-
-        output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd;
-        output(iyNw, std::min(ixNe, iWidth)) = output(iyNw,
-            std::min(ixNe, iWidth)) + ne * ograd;
-        output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight),
-            ixNw) + sw * ograd;
-        output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output(
-            std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd;
-      }
-    }
-  }
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored depth of the input.
-  size_t inputDepth;
-
-  //! The size of the input units.
-  size_t inSize;
-
-  //! The used glimpse size (height = width).
-  size_t size;
-
-  //! The number of patches to crop per glimpse.
-  size_t depth;
-
-  //! The scale fraction.
-  size_t scale;
-
-  //! The x and y coordinate of the center of the output glimpse.
-  arma::mat location;
-
-  //! Locally-stored object to perform the mean pooling operation.
-  MeanPooling pooling;
-}; // class GlimpseLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
deleted file mode 100644
index c707017..0000000
--- a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/**
- * @file hard_tanh_layer.hpp
- * @author Dhawal Arora
- *
- * Definition and implementation of the HardTanHLayer layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The Hard Tanh activation function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \left\{
- *   \begin{array}{lr}
- *     max & : x > maxValue \\
- *     min & : x \le minValue \\
- *     x   & : otherwise
- *   \end{array}
- * \right. \\
- * f'(x) &=& \left\{
- *   \begin{array}{lr}
- *     0 & : x > maxValue \\
- *     0 & : x \le minValue \\
- *     1 & : otherwise
- *   \end{array}
- * \right.
- * @f}
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class HardTanHLayer
-{
- public:
-  /**
-   * Create the HardTanHLayer object using the specified parameters. The range
-   * of the linear region can be adjusted by specifying the maxValue and
-   * minValue. Default (maxValue = 1, minValue = -1).
-   *
-   * @param maxValue Range of the linear region maximum value.
-   * @param minValue Range of the linear region minimum value.
-   */
-  HardTanHLayer(const double maxValue = 1, const double minValue = -1) :
-      maxValue(maxValue), minValue(minValue)
-  {
-     // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    Fn(input, output);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& gy,
-                DataType& g)
-  {
-    DataType derivative;
-    Deriv(input, derivative);
-    g = gy % derivative;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    arma::Cube<eT> derivative;
-    Deriv(input, derivative);
-    g = mappedError % derivative;
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the maximum value.
-  double const& MaxValue() const { return maxValue; }
-  //! Modify the maximum value.
-  double& MaxValue() { return maxValue; }
-
-  //! Get the minimum value.
-  double const& MinValue() const { return minValue; }
-  //! Modify the minimum value.
-  double& MinValue() { return minValue; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(maxValue, "maxValue");
-    ar & data::CreateNVP(minValue, "minValue");
-  }
-
- private:
-  /**
-   * Computes the HardTanH function.
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  double Fn(const double x)
-  {
-    if (x > maxValue)
-      return maxValue;
-    else if (x < minValue)
-      return minValue;
-    return x;
-  }
-
-  /**
-   * Computes the HardTanH function using a dense matrix as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-
-  template<typename eT>
-  void Fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
-  {
-    y = x;
-    y.transform( [&](eT val) { return std::min(
-        std::max( val, minValue ), maxValue ); } );
-  }
-
-  /**
-   * Computes the HardTanH function using a 3rd-order tensor as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  void Fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
-  {
-    y = x;
-    for (size_t s = 0; s < x.n_slices; s++)
-      Fn(x.slice(s), y.slice(s));
-  }
-
-  /**
-   * Computes the first derivative of the HardTanH function.
-   *
-   * @param x Input data.
-   * @return f'(x)
-   */
-  double Deriv(const double x)
-  {
-    return (x > maxValue || x < minValue) ? 0 : 1;
-  }
-
-  /**
-   * Computes the first derivative of the HardTanH function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-  template<typename InputType, typename OutputType>
-  void Deriv(const InputType& x, OutputType& y)
-  {
-    y = x;
-
-    for (size_t i = 0; i < x.n_elem; i++)
-      y(i) = Deriv(x(i));
-  }
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Maximum value for the HardTanH function.
-  double maxValue;
-
-  //! Minimum value for the HardTanH function.
-  double minValue;
-}; // class HardTanHLayer
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/layer_traits.hpp b/src/mlpack/methods/ann/layer/layer_traits.hpp
deleted file mode 100644
index a8671d6..0000000
--- a/src/mlpack/methods/ann/layer/layer_traits.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * @file layer_traits.hpp
- * @author Marcus Edel
- *
- * This provides the LayerTraits class, a template class to get information
- * about various layers.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LAYER_TRAITS_HPP
-#define MLPACK_METHODS_ANN_LAYER_LAYER_TRAITS_HPP
-
-#include <mlpack/core/util/sfinae_utility.hpp>
-
-namespace mlpack {
-namespace ann {
-
-/**
- * This is a template class that can provide information about various layers.
- * By default, this class will provide the weakest possible assumptions on
- * layer, and each layer should override values as necessary.  If a layer
- * doesn't need to override a value, then there's no need to write a LayerTraits
- * specialization for that class.
- */
-template<typename LayerType>
-class LayerTraits
-{
- public:
-  /**
-   * This is true if the layer is a binary layer.
-   */
-  static const bool IsBinary = false;
-
-  /**
-   * This is true if the layer is an output layer.
-   */
-  static const bool IsOutputLayer = false;
-
-  /**
-   * This is true if the layer is a bias layer.
-   */
-  static const bool IsBiasLayer = false;
-
-  /*
-   * This is true if the layer is a LSTM layer.
-   **/
-  static const bool IsLSTMLayer = false;
-
-  /*
-   * This is true if the layer is a connection layer.
-   **/
-  static const bool IsConnection = false;
-};
-
-// This gives us a HasGradientCheck<T, U> type (where U is a function pointer)
-// we can use with SFINAE to catch when a type has a Gradient(...) function.
-HAS_MEM_FUNC(Gradient, HasGradientCheck);
-
-// This gives us a HasDeterministicCheck<T, U> type (where U is a function
-// pointer) we can use with SFINAE to catch when a type has a Deterministic()
-// function.
-HAS_MEM_FUNC(Deterministic, HasDeterministicCheck);
-
-// This gives us a HasRecurrentParameterCheck<T, U> type (where U is a function
-// pointer) we can use with SFINAE to catch when a type has a
-// RecurrentParameter() function.
-HAS_MEM_FUNC(RecurrentParameter, HasRecurrentParameterCheck);
-
-// This gives us a HasSeqLenCheck<T, U> type (where U is a function pointer) we
-// can use with SFINAE to catch when a type has a SeqLen() function.
-HAS_MEM_FUNC(SeqLen, HasSeqLenCheck);
-
-// This gives us a HasWeightsCheck<T, U> type (where U is a function pointer) we
-// can use with SFINAE to catch when a type has a Weights() function.
-HAS_MEM_FUNC(Weights, HasWeightsCheck);
-
-// This gives us a HasLocationCheck<T, U> type (where U is a function pointer)
-// we can use with SFINAE to catch when a type has a Location() function.
-HAS_MEM_FUNC(Location, HasLocationCheck);
-
-// This gives us a HasRewardCheck<T, U> type (where U is a function pointer) we
-// can use with SFINAE to catch when a type has a Reward() function.
-HAS_MEM_FUNC(Reward, HasRewardCheck);
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
deleted file mode 100644
index a87792e..0000000
--- a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/**
- * @file leaky_relu_layer.hpp
- * @author Dhawal Arora
- *
- * Definition and implementation of LeakyReLULayer layer first introduced
- * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng,
- * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The LeakyReLU activation function, defined by
- *
- * @f{eqnarray*}{
- * f(x) &=& \max(x, alpha*x) \\
- * f'(x) &=& \left\{
- *   \begin{array}{lr}
- *     1 & : x > 0 \\
- *     alpha & : x \le 0
- *   \end{array}
- * \right.
- * @f}
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class LeakyReLULayer
-{
- public:
-  /**
-   * Create the LeakyReLULayer object using the specified parameters.
-   * The non zero gradient can be adjusted by specifying tha parameter
-   * alpha in the range 0 to 1. Default (alpha = 0.03)
-   *
-   * @param alpha Non zero gradient
-   */
-  LeakyReLULayer(const double alpha = 0.03) : alpha(alpha)
-  {
-     // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    Fn(input, output);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& gy,
-                DataType& g)
-  {
-    DataType derivative;
-    Deriv(input, derivative);
-    g = gy % derivative;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube using the backpropagated error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
-        input.n_cols, input.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * input.n_rows * input.n_cols,
-            (j + 1) * input.n_rows * input.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            input.n_rows, input.n_cols);
-      }
-    }
-
-    arma::Cube<eT> derivative;
-    Deriv(input, derivative);
-    g = mappedError % derivative;
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the non zero gradient.
-  double const& Alpha() const { return alpha; }
-  //! Modify the non zero gradient.
-  double& Alpha() { return alpha; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(alpha, "alpha");
-  }
-
- private:
-  /**
-   * Computes the LeakReLU function
-   *
-   * @param x Input data.
-   * @return f(x).
-   */
-  double Fn(const double x)
-  {
-    return std::max(x, alpha * x);
-  }
-
-  /**
-   * Computes the Leaky ReLU function using a dense matrix as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  void Fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
-  {
-    y = arma::max(x, alpha * x);
-  }
-
-  /**
-   * Computes the LeakyReLU function using a 3rd-order tensor as input.
-   *
-   * @param x Input data.
-   * @param y The resulting output activation.
-   */
-  template<typename eT>
-  void Fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
-  {
-    y = x;
-    for (size_t s = 0; s < x.n_slices; s++)
-      fn(x.slice(s), y.slice(s));
-  }
-
-  /**
-   * Computes the first derivative of the LeakyReLU function.
-   *
-   * @param x Input data.
-   * @return f'(x)
-   */
-  double Deriv(const double x)
-  {
-    return (x >= 0) ? 1 : alpha;
-  }
-
-  /**
-   * Computes the first derivative of the LeakyReLU function.
-   *
-   * @param y Input activations.
-   * @param x The resulting derivatives.
-   */
-
-  template<typename InputType, typename OutputType>
-  void Deriv(const InputType& x, OutputType& y)
-  {
-    y = x;
-
-    for (size_t i = 0; i < x.n_elem; i++)
-      y(i) = Deriv(x(i));
-  }
-
-
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Leakyness Parameter in the range 0 <alpha< 1
-  double alpha;
-
-}; // class LeakyReLULayer
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp
deleted file mode 100644
index b3b3dbf..0000000
--- a/src/mlpack/methods/ann/layer/linear_layer.hpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * @file linear_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the LinearLayer class also known as fully-connected layer or
- * affine transformation.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the LinearLayer class. The LinearLayer class represents a
- * single layer of a neural network.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class LinearLayer
-{
- public:
-  /**
-   * Create the LinearLayer object using the specified number of units.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   */
-  LinearLayer(const size_t inSize, const size_t outSize) :
-      inSize(inSize),
-      outSize(outSize)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = weights * input;
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Mat<eT>& output)
-  {
-    arma::Mat<eT> data(input.n_elem, 1);
-
-    for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++)
-    {
-      for (size_t i = 0; i < data.n_cols; i++, c++)
-      {
-        data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) *
-            input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise(
-            input.slice(c), 1));
-      }
-    }
-
-    output = weights * data;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = weights.t() * gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param error The calculated error.
-   * @param gradient The calculated gradient.
-   */
-  template<typename InputType, typename ErrorType, typename GradientType>
-  void Gradient(const InputType& input,
-                const ErrorType& error,
-                GradientType& gradient)
-  {
-    GradientDelta(input, error, gradient);
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-  }
-
- private:
-  /*
-   * Calculate the gradient using the output delta (3rd order tensor) and the
-   * input activation (3rd order tensor).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Cube<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Cube<eT>& g)
-  {
-    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    arma::Mat<eT> data = arma::Mat<eT>(d.n_cols,
-        input.n_elem / d.n_cols);
-
-    for (size_t s = 0, c = 0; s < input.n_slices /
-        data.n_rows; s++)
-    {
-      for (size_t i = 0; i < data.n_rows; i++, c++)
-      {
-        data.row(i).subvec(s * input.n_rows *
-            input.n_cols, (s + 1) *
-            input.n_rows *
-        input.n_cols - 1) = arma::vectorise(
-                input.slice(c), 1);
-      }
-    }
-
-    g.slice(0) = d * data / d.n_cols;
-  }
-
-  /*
-   * Calculate the gradient (3rd order tensor) using the output delta
-   * (dense matrix) and the input activation (dense matrix).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Mat<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Cube<eT>& g)
-  {
-    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    Gradient(input, d, g.slice(0));
-  }
-
-  /*
-   * Calculate the gradient (dense matrix) using the output delta
-   * (dense matrix) and the input activation (3rd order tensor).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Cube<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Mat<eT>& g)
-  {
-    arma::Cube<eT> grad = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
-    Gradient(input, d, grad);
-    g = grad.slice(0);
-  }
-
-  /*
-   * Calculate the gradient (dense matrix) using the output delta
-   * (dense matrix) and the input activation (dense matrix).
-   *
-   * @param input The input parameter used for calculating the gradient.
-   * @param d The output delta.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void GradientDelta(const arma::Mat<eT>& input,
-                     const arma::Mat<eT>& d,
-                     arma::Mat<eT>& g)
-  {
-    g = d * input.t();
-  }
-
-  //! Locally-stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class LinearLayer
-
-/**
- * Linear Mapping layer to map between 3rd order tensors and dense matrices.
- */
-template <
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::mat
->
-using LinearMappingLayer = LinearLayer<InputDataType, OutputDataType>;
-
-//! Layer traits for the linear layer.
-template<
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<LinearLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
deleted file mode 100644
index 2b417e3..0000000
--- a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/**
- * @file log_softmax_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the LogSoftmaxLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the log softmax layer. The log softmax loss layer computes
- * the multinomial logistic loss of the softmax of its inputs. This layer is
- * meant to be used in combination with the negative log likelihood layer
- * (NegativeLogLikelihoodLayer), which expects that the input contains
- * log-probabilities for each class.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class LogSoftmaxLayer
-{
- public:
-  /**
-   * Create the LogSoftmaxLayer object.
-   */
-  LogSoftmaxLayer() { /* Nothing to do here. */ }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1);
-    output = (maxInput - input);
-
-    // Approximation of the hyperbolic tangent. The acuracy however is
-    // about 0.00001 lower as using tanh. Credits go to Leon Bottou.
-    output.transform( [](double x)
-    {
-      //! Fast approximation of exp(-x) for x positive.
-      static constexpr double A0 = 1.0;
-      static constexpr double A1 = 0.125;
-      static constexpr double A2 = 0.0078125;
-      static constexpr double A3 = 0.00032552083;
-      static constexpr double A4 = 1.0172526e-5;
-
-      if (x < 13.0)
-      {
-        double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4)));
-        y *= y;
-        y *= y;
-        y *= y;
-        y = 1 / y;
-
-        return y;
-      }
-
-      return 0.0;
-    } );
-
-    output = input - (maxInput + std::log(arma::accu(output)));
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Mat<eT>& input,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = gy - arma::exp(input) * arma::accu(gy);
-  }
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  InputDataType& Delta() const { return delta; }
-  //! Modify the delta.
-  InputDataType& Delta() { return delta; }
-
- private:
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class LogSoftmaxLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp
deleted file mode 100644
index 6ccd2fc..0000000
--- a/src/mlpack/methods/ann/layer/lstm_layer.hpp
+++ /dev/null
@@ -1,418 +0,0 @@
-/**
- * @file lstm_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the LSTMLayer class, which implements a lstm network
- * layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a lstm network layer.
- *
- * This class allows specification of the type of the activation functions used
- * for the gates and cells and also of the type of the function used to
- * initialize and update the peephole weights.
- *
- * @tparam GateActivationFunction Activation function used for the gates.
- * @tparam StateActivationFunction Activation function used for the state.
- * @tparam OutputActivationFunction Activation function used for the output.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    class GateActivationFunction = LogisticFunction,
-    class StateActivationFunction = TanhFunction,
-    class OutputActivationFunction = TanhFunction,
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class LSTMLayer
-{
- public:
-  /**
-   * Create the LSTMLayer object using the specified parameters.
-   *
-   * @param outSize The number of output units.
-   * @param peepholes The flag used to indicate if peephole connections should
-   *        be used (Default: false).
-   * @param WeightInitRule The weight initialization rule used to initialize the
-   *        weight matrix.
-   */
-  LSTMLayer(const size_t outSize, const bool peepholes = false) :
-      outSize(outSize),
-      peepholes(peepholes),
-      seqLen(1),
-      offset(0)
-  {
-    if (peepholes)
-    {
-      peepholeWeights.set_size(outSize, 3);
-      peepholeDerivatives = arma::zeros<OutputDataType>(outSize, 3);
-    }
-    else
-    {
-      peepholeWeights.set_size(0, 0);
-    }
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    if (inGate.n_cols < seqLen)
-    {
-      inGate = arma::zeros<InputDataType>(outSize, seqLen);
-      inGateAct = arma::zeros<InputDataType>(outSize, seqLen);
-      inGateError = arma::zeros<InputDataType>(outSize, seqLen);
-      outGate = arma::zeros<InputDataType>(outSize, seqLen);
-      outGateAct = arma::zeros<InputDataType>(outSize, seqLen);
-      outGateError = arma::zeros<InputDataType>(outSize, seqLen);
-      forgetGate = arma::zeros<InputDataType>(outSize, seqLen);
-      forgetGateAct = arma::zeros<InputDataType>(outSize, seqLen);
-      forgetGateError = arma::zeros<InputDataType>(outSize, seqLen);
-      state = arma::zeros<InputDataType>(outSize, seqLen);
-      stateError = arma::zeros<InputDataType>(outSize, seqLen);
-      cellAct = arma::zeros<InputDataType>(outSize, seqLen);
-    }
-
-    // Split up the inputactivation into the 3 parts (inGate, forgetGate,
-    // outGate).
-    inGate.col(offset) = input.submat(0, 0, outSize - 1, 0);
-
-    forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0);
-    outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0);
-
-    if (peepholes && offset > 0)
-    {
-      inGate.col(offset) += peepholeWeights.col(0) % state.col(offset - 1);
-      forgetGate.col(offset) += peepholeWeights.col(1) %
-          state.col(offset - 1);
-    }
-
-    arma::Col<eT> inGateActivation = inGateAct.unsafe_col(offset);
-    GateActivationFunction::fn(inGate.unsafe_col(offset), inGateActivation);
-
-    arma::Col<eT> forgetGateActivation = forgetGateAct.unsafe_col(offset);
-    GateActivationFunction::fn(forgetGate.unsafe_col(offset),
-        forgetGateActivation);
-
-    arma::Col<eT> cellActivation = cellAct.unsafe_col(offset);
-    StateActivationFunction::fn(input.submat(outSize * 2, 0,
-        (outSize * 3) - 1, 0), cellActivation);
-
-    state.col(offset) = inGateAct.col(offset) % cellActivation;
-
-    if (offset > 0)
-      state.col(offset) += forgetGateAct.col(offset) % state.col(offset - 1);
-
-    if (peepholes)
-      outGate.col(offset) += peepholeWeights.col(2) % state.col(offset);
-
-    arma::Col<eT> outGateActivation = outGateAct.unsafe_col(offset);
-    GateActivationFunction::fn(outGate.unsafe_col(offset), outGateActivation);
-
-    OutputActivationFunction::fn(state.unsafe_col(offset), output);
-    output = outGateAct.col(offset) % output;
-
-    offset = (offset + 1) % seqLen;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    queryOffset = seqLen - offset - 1;
-
-    arma::Col<eT> outGateDerivative;
-    GateActivationFunction::deriv(outGateAct.unsafe_col(queryOffset),
-        outGateDerivative);
-
-    arma::Col<eT> stateActivation;
-    StateActivationFunction::fn(state.unsafe_col(queryOffset), stateActivation);
-
-    outGateError.col(queryOffset) = outGateDerivative % gy % stateActivation;
-
-    arma::Col<eT> stateDerivative;
-    StateActivationFunction::deriv(stateActivation, stateDerivative);
-
-    stateError.col(queryOffset) = gy % outGateAct.col(queryOffset) %
-        stateDerivative;
-
-    if (queryOffset < (seqLen - 1))
-    {
-      stateError.col(queryOffset) += stateError.col(queryOffset + 1) %
-          forgetGateAct.col(queryOffset + 1);
-
-      if (peepholes)
-      {
-        stateError.col(queryOffset) += inGateError.col(queryOffset + 1) %
-            peepholeWeights.col(0);
-        stateError.col(queryOffset) += forgetGateError.col(queryOffset + 1) %
-            peepholeWeights.col(1);
-      }
-    }
-
-    if (peepholes)
-    {
-      stateError.col(queryOffset) += outGateError.col(queryOffset) %
-          peepholeWeights.col(2);
-    }
-
-    arma::Col<eT> cellDerivative;
-    StateActivationFunction::deriv(cellAct.col(queryOffset), cellDerivative);
-
-    arma::Col<eT> cellError = inGateAct.col(queryOffset) % cellDerivative %
-        stateError.col(queryOffset);
-
-    if (queryOffset > 0)
-    {
-      arma::Col<eT> forgetGateDerivative;
-      GateActivationFunction::deriv(forgetGateAct.col(queryOffset),
-          forgetGateDerivative);
-
-      forgetGateError.col(queryOffset) = forgetGateDerivative %
-          stateError.col(queryOffset) % state.col(queryOffset - 1);
-    }
-
-    arma::Col<eT> inGateDerivative;
-    GateActivationFunction::deriv(inGateAct.col(queryOffset), inGateDerivative);
-
-    inGateError.col(queryOffset) = inGateDerivative %
-        stateError.col(queryOffset) % cellAct.col(queryOffset);
-
-    if (peepholes)
-    {
-      peepholeDerivatives.col(2) += outGateError.col(queryOffset) %
-          state.col(queryOffset);
-
-      if (queryOffset > 0)
-      {
-        peepholeDerivatives.col(0) += inGateError.col(queryOffset) %
-            state.col(queryOffset - 1);
-        peepholeDerivatives.col(1) += forgetGateError.col(queryOffset) %
-            state.col(queryOffset - 1);
-      }
-    }
-
-    g = arma::zeros<arma::Mat<eT> >(outSize * 4, 1);
-    g.submat(0, 0, outSize - 1, 0) = inGateError.col(queryOffset);
-    g.submat(outSize, 0, (outSize * 2) - 1, 0) =
-        forgetGateError.col(queryOffset);
-    g.submat(outSize * 2, 0, (outSize * 3) - 1, 0) = cellError;
-    g.submat(outSize * 3, 0, (outSize * 4) - 1, 0) =
-        outGateError.col(queryOffset);
-
-    offset = (offset + 1) % seqLen;
-  }
-
-  /**
-   * Ordinary feed backward pass of the lstm layer.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& /* input */,
-                const arma::Mat<eT>& /* gy */,
-                GradientDataType& /* g */)
-  {
-    if (peepholes && offset == 0)
-    {
-      peepholeGradient.col(0) = arma::trans((peepholeWeights.col(0).t() *
-          (inGateError.col(queryOffset) % peepholeDerivatives.col(0))) *
-          inGate.col(queryOffset).t());
-
-      peepholeGradient.col(1) = arma::trans((peepholeWeights.col(1).t() *
-          (forgetGateError.col(queryOffset) % peepholeDerivatives.col(1))) *
-          forgetGate.col(queryOffset).t());
-
-      peepholeGradient.col(2) = arma::trans((peepholeWeights.col(2).t() *
-          (outGateError.col(queryOffset) % peepholeDerivatives.col(2))) *
-          outGate.col(queryOffset).t());
-
-      peepholeDerivatives.zeros();
-    }
-  }
-
-  //! Get the peephole weights.
-  OutputDataType const& Weights() const { return peepholeWeights; }
-  //! Modify the peephole weights.
-  OutputDataType& Weights() { return peepholeWeights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the peephole gradient.
-  OutputDataType const& Gradient() const { return peepholeGradient; }
-  //! Modify the peephole gradient.
-  OutputDataType& Gradient() { return peepholeGradient; }
-
-  //! Get the sequence length.
-  size_t SeqLen() const { return seqLen; }
-  //! Modify the sequence length.
-  size_t& SeqLen() { return seqLen; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(peepholes, "peepholes");
-
-    if (peepholes)
-    {
-      ar & data::CreateNVP(peepholeWeights, "peepholeWeights");
-
-      if (Archive::is_loading::value)
-      {
-        peepholeDerivatives = arma::zeros<OutputDataType>(
-            peepholeWeights.n_rows, 3);
-      }
-    }
-  }
-
- private:
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored peephole indication flag.
-  bool peepholes;
-
-  //! Locally-stored length of the the input sequence.
-  size_t seqLen;
-
-  //! Locally-stored sequence offset.
-  size_t offset;
-
-  //! Locally-stored query offset.
-  size_t queryOffset;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored ingate object.
-  InputDataType inGate;
-
-  //! Locally-stored ingate activation object.
-  InputDataType inGateAct;
-
-  //! Locally-stored ingate error object.
-  InputDataType inGateError;
-
-  //! Locally-stored outgate object.
-  InputDataType outGate;
-
-  //! Locally-stored outgate activation object.
-  InputDataType outGateAct;
-
-  //! Locally-stored outgate error object.
-  InputDataType outGateError;
-
-  //! Locally-stored forget object.
-  InputDataType forgetGate;
-
-  //! Locally-stored forget activation object.
-  InputDataType forgetGateAct;
-
-  //! Locally-stored forget error object.
-  InputDataType forgetGateError;
-
-  //! Locally-stored state object.
-  InputDataType state;
-
-  //! Locally-stored state erro object.
-  InputDataType stateError;
-
-  //! Locally-stored cell activation object.
-  InputDataType cellAct;
-
-  //! Locally-stored peephole weight object.
-  OutputDataType peepholeWeights;
-
-  //! Locally-stored derivatives object.
-  OutputDataType peepholeDerivatives;
-
-  //! Locally-stored peephole gradient object.
-  OutputDataType peepholeGradient;
-}; // class LSTMLayer
-
-//! Layer traits for the lstm layer.
-template<
-    class GateActivationFunction,
-    class StateActivationFunction,
-    class OutputActivationFunction,
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<LSTMLayer<GateActivationFunction,
-                            StateActivationFunction,
-                            OutputActivationFunction,
-                            InputDataType,
-                            OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = true;
-  static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp
deleted file mode 100644
index 7705b52..0000000
--- a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * @file multiclass_classification_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the MulticlassClassificationLayer class, which implements a
- * multiclass classification layer that can be used as output layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a multiclass classification layer that can be used as
- * output layer.
- *
- * A convenience typedef is given:
- *
- *  - ClassificationLayer
- */
-class MulticlassClassificationLayer
-{
- public:
-  /**
-   * Create the MulticlassClassificationLayer object.
-   */
-  MulticlassClassificationLayer()
-  {
-    // Nothing to do here.
-  }
-
-  /*
-   * Calculate the error using the specified input activation and the target.
-   * The error is stored into the given error parameter.
-   *
-   * @param inputActivations Input data used for evaluating the network.
-   * @param target Target data used for evaluating the network.
-   * @param error The calculated error with respect to the input activation and
-   * the given target.
-   */
-  template<typename DataType>
-  void CalculateError(const DataType& inputActivations,
-                      const DataType& target,
-                      DataType& error)
-  {
-    error = inputActivations - target;
-  }
-
-  /*
-   * Calculate the output class using the specified input activation.
-   *
-   * @param inputActivations Input data used to calculate the output class.
-   * @param output Output class of the input activation.
-   */
-  template<typename DataType>
-  void OutputClass(const DataType& inputActivations, DataType& output)
-  {
-    output = inputActivations;
-  }
-
-  /**
-   * Serialize the layer
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-  }
-}; // class MulticlassClassificationLayer
-
-//! Layer traits for the multiclass classification layer.
-template <>
-class LayerTraits<MulticlassClassificationLayer>
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = true;
-  static const bool IsBiasLayer = false;
-  static const bool IsConnection = false;
-};
-
-/***
- * Alias ClassificationLayer.
- */
-using ClassificationLayer = MulticlassClassificationLayer;
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
deleted file mode 100644
index afa0f42..0000000
--- a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * @file multiply_constant_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the MultiplyConstantLayer class, which multiplies the input by
- * a (non-learnable) constant.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the multiply constant layer. The multiply constant layer
- * multiplies the input by a (non-learnable) constant.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class MultiplyConstantLayer
-{
- public:
-  /**
-   * Create the BaseLayer object.
-   */
-  MultiplyConstantLayer(const double scalar) : scalar(scalar)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network. Multiply the input with the
-   * specified constant scalar value.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename InputType, typename OutputType>
-  void Forward(const InputType& input, OutputType& output)
-  {
-    output = input * scalar;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network. The backward pass
-   * multiplies the error with the specified constant scalar value.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& /* input */, const DataType& gy, DataType& g)
-  {
-    g = gy * scalar;
-  }
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(scalar, "scalar");
-  }
-
- private:
-  //! Locally-stored constant scalar value.
-  const double scalar;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class MultiplyConstantLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
deleted file mode 100644
index 6c08698..0000000
--- a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/**
- * @file negative_log_likelihood_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the NegativeLogLikelihoodLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
-#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the negative log likelihood layer. The negative log
- * likelihood layer expects that the input contains log-probabilities for each
- * class. The layer also expects a class index, in the range between 1 and the
- * number of classes, as target when calling the Forward function.
- *
- * @tparam ActivationFunction Activation function used for the embedding layer.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class NegativeLogLikelihoodLayer
-{
- public:
-  /**
-   * Create the NegativeLogLikelihoodLayer object.
-   */
-  NegativeLogLikelihoodLayer() { /* Nothing to do here. */ }
-
-  /**
-   * Ordinary feed forward pass of a neural network. The negative log
-   * likelihood layer expects that the input contains log-probabilities for
-   * each class. The layer also expects a class index, in the range between 1
-   * and the number of classes, as target when calling the Forward function.
-   *
-   * @param input Input data that contains the log-probabilities for each class.
-   * @param target The target vector, that contains the class index in the range
-   *        between 1 and the number of classes.
-   */
-  template<typename eT>
-  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
-  {
-    double output = 0;
-
-    for (size_t i = 0; i < input.n_cols; ++i)
-    {
-      size_t currentTarget = target(i) - 1;
-      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
-          "Target class out of range.");
-
-      output -= input(currentTarget, i);
-    }
-
-    return output;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network. The negative log
-   * likelihood layer expects that the input contains log-probabilities for
-   * each class. The layer also expects a class index, in the range between 1
-   * and the number of classes, as target when calling the Forward function.
-   *
-   * @param input The propagated input activation.
-   * @param target The target vector, that contains the class index in the range
-   *        between 1 and the number of classes.
-   * @param output The calculated error.
-   */
-  template<typename eT>
-  void Backward(const arma::Mat<eT>& input,
-                const arma::Mat<eT>& target,
-                arma::Mat<eT>& output)
-  {
-    output = arma::zeros<arma::Mat<eT> >(input.n_rows, input.n_cols);
-    for (size_t i = 0; i < input.n_cols; ++i)
-    {
-      size_t currentTarget = target(i) - 1;
-      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
-          "Target class out of range.");
-
-      output(currentTarget, i) = -1;
-    }
-  }
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
- private:
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class NegativeLogLikelihoodLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp
deleted file mode 100644
index 63200b2..0000000
--- a/src/mlpack/methods/ann/layer/one_hot_layer.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * @file one_hot_layer.hpp
- * @author Shangtong Zhang
- *
- * Definition of the OneHotLayer class, which implements a standard network
- * layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a one hot classification layer that can be used as
- * output layer.
- */
-class OneHotLayer
-{
- public:
-  /**
-   * Create the OneHotLayer object.
-   */
-  OneHotLayer()
-  {
-    // Nothing to do here.
-  }
-
-  /*
-   * Calculate the error using the specified input activation and the target.
-   * The error is stored into the given error parameter.
-   *
-   * @param inputActivations Input data used for evaluating the network.
-   * @param target Target data used for evaluating the network.
-   * @param error The calculated error with respect to the input activation and
-   * the given target.
-   */
-  template<typename DataType>
-  void CalculateError(const DataType& inputActivations,
-                      const DataType& target,
-                      DataType& error)
-  {
-    error = inputActivations - target;
-  }
-
-  /*
-   * Calculate the output class using the specified input activation.
-   *
-   * @param inputActivations Input data used to calculate the output class.
-   * @param output Output class of the input activation.
-   */
-  template<typename DataType>
-  void OutputClass(const DataType& inputActivations, DataType& output)
-  {
-    output = inputActivations;
-    output.zeros();
-
-    arma::uword maxIndex = 0;
-    inputActivations.max(maxIndex);
-    output(maxIndex) = 1;
-  }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& /* ar */, const unsigned int /* version */)
-  {
-    /* Nothing to do here */
-  }
-}; // class OneHotLayer
-
-//! Layer traits for the one-hot class classification layer.
-template <>
-class LayerTraits<OneHotLayer>
-{
- public:
-  static const bool IsBinary = true;
-  static const bool IsOutputLayer = true;
-  static const bool IsBiasLayer = false;
-  static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/pooling_layer.hpp b/src/mlpack/methods/ann/layer/pooling_layer.hpp
deleted file mode 100644
index e8a205f..0000000
--- a/src/mlpack/methods/ann/layer/pooling_layer.hpp
+++ /dev/null
@@ -1,267 +0,0 @@
-/**
- * @file pooling_layer.hpp
- * @author Marcus Edel
- * @author Nilay Jain
- *
- * Definition of the PoolingLayer class, which attaches various pooling
- * functions to the embedding layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the pooling layer. The pooling layer works as a metaclass
- * which attaches various functions to the embedding layer.
- *
- * @tparam PoolingRule Pooling function used for the embedding layer.
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename PoolingRule = MeanPooling,
-    typename InputDataType = arma::cube,
-    typename OutputDataType = arma::cube
->
-class PoolingLayer
-{
- public:
-  /**
-   * Create the PoolingLayer object using the specified number of units.
-   *
-   * @param kSize Size of the pooling window.
-   * @param stride The stride of the convolution operation.
-   * @param pooling The pooling strategy.
-   */
-  PoolingLayer(const size_t kSize,
-               const size_t stride = 1,
-               PoolingRule pooling = PoolingRule()) :
-      kSize(kSize),
-      stride(stride),
-      pooling(pooling)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    Pooling(input, output);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
-  {
-    output = arma::zeros<arma::Cube<eT> >((input.n_rows - kSize) / stride + 1,
-        (input.n_cols - kSize) / stride + 1, input.n_slices);
-
-    for (size_t s = 0; s < input.n_slices; s++)
-      Pooling(input.slice(s), output.slice(s));
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
-   * input, calculating the function f(x) by propagating x backwards through f.
-   * Using the results from the feed forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& /* unused */,
-                const arma::Cube<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    g = arma::zeros<arma::Cube<eT> >(inputParameter.n_rows,
-        inputParameter.n_cols, inputParameter.n_slices);
-
-    for (size_t s = 0; s < gy.n_slices; s++)
-    {
-      Unpooling(inputParameter.slice(s), gy.slice(s), g.slice(s));
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
-   * input, calculating the function f(x) by propagating x backwards through f.
-   * Using the results from the feed forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Cube<eT>& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Cube<eT>& g)
-  {
-    // Generate a cube from the error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(outputParameter.n_rows,
-        outputParameter.n_cols, outputParameter.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
-    {
-      for (size_t i = 0; i < gy.n_cols; i++)
-      {
-        arma::Col<eT> temp = gy.col(i).subvec(
-            j * outputParameter.n_rows * outputParameter.n_cols,
-            (j + 1) * outputParameter.n_rows * outputParameter.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            outputParameter.n_rows, outputParameter.n_cols);
-      }
-    }
-
-    Backward(inputParameter, mappedError, g);
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  InputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  InputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(kSize, "kSize");
-    ar & data::CreateNVP(pooling, "pooling");
-    ar & data::CreateNVP(stride, "stride");
-  }
-
- private:
-  /**
-   * Apply pooling to the input and store the results.
-   *
-   * @param input The input to be apply the pooling rule.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void Pooling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    const size_t rStep = kSize;
-    const size_t cStep = kSize;
-
-    for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += stride)
-    {
-      for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += stride)
-      {
-        output(i, j) += pooling.Pooling(input(
-            arma::span(rowidx, rowidx + rStep - 1),
-            arma::span(colidx, colidx + cStep - 1)));
-      }
-    }
-  }
-
-  /**
-   * Apply unpooling to the input and store the results.
-   *
-   * @param input The input to be apply the unpooling rule.
-   * @param output The pooled result.
-   */
-  template<typename eT>
-  void Unpooling(const arma::Mat<eT>& input,
-                 const arma::Mat<eT>& error,
-                 arma::Mat<eT>& output)
-  {
-    const size_t rStep = input.n_rows / error.n_rows;
-    const size_t cStep = input.n_cols / error.n_cols;
-
-    arma::Mat<eT> unpooledError;
-    for (size_t j = 0; j < input.n_cols; j += cStep)
-    {
-      for (size_t i = 0; i < input.n_rows; i += rStep)
-      {
-        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
-            arma::span(j, j + cStep - 1));
-
-        pooling.Unpooling(inputArea, error(i / rStep, j / cStep),
-            unpooledError);
-
-        output(arma::span(i, i + rStep - 1),
-            arma::span(j, j + cStep - 1)) += unpooledError;
-      }
-    }
-  }
-
-  //! Locally-stored size of the pooling window.
-  size_t kSize;
-
-  //! Locally-stored stride value by which we move filter.
-  size_t stride;
-
-  //! Locally-stored pooling strategy.
-  PoolingRule pooling;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class PoolingLayer
-
-//! Layer traits for the pooling layer.
-template<
-    typename PoolingRule,
-    typename InputDataType,
-    typename OutputDataType
->
-class LayerTraits<PoolingLayer<PoolingRule, InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
-
diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
deleted file mode 100644
index 5e231a7..0000000
--- a/src/mlpack/methods/ann/layer/recurrent_layer.hpp
+++ /dev/null
@@ -1,192 +0,0 @@
-/**
- * @file recurrent_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the RecurrentLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the RecurrentLayer class. Recurrent layers can be used
- * similarly to feed-forward layers except that the input isn't stored in the
- * inputParameter, instead it's in stored in the recurrentParameter.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class RecurrentLayer
-{
- public:
-  /**
-   * Create the RecurrentLayer object using the specified number of units.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   */
-  RecurrentLayer(const size_t inSize, const size_t outSize) :
-      inSize(outSize),
-      outSize(outSize),
-      recurrentParameter(arma::zeros<InputDataType>(inSize, 1))
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Create the RecurrentLayer object using the specified number of units.
-   *
-   * @param outSize The number of output units.
-   */
-  RecurrentLayer(const size_t outSize) :
-      inSize(outSize),
-      outSize(outSize),
-      recurrentParameter(arma::zeros<InputDataType>(outSize, 1))
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = input + weights * recurrentParameter;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::mat& g)
-  {
-    g = (weights).t() * gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input activation.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& /* input */,
-                const arma::Mat<eT>& d,
-                GradientDataType& g)
-  {
-    g = d * recurrentParameter.t();
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the input parameter.
-  InputDataType const& RecurrentParameter() const { return recurrentParameter; }
-  //! Modify the input parameter.
-  InputDataType& RecurrentParameter() { return recurrentParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(recurrentParameter, "recurrentParameter");
-    ar & data::CreateNVP(weights, "weights");
-  }
-
- private:
-  //! Locally-stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! Locally-stored recurrent parameter object.
-  InputDataType recurrentParameter;
-}; // class RecurrentLayer
-
-//! Layer traits for the recurrent layer.
-template<typename InputDataType, typename OutputDataType
->
-class LayerTraits<RecurrentLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
deleted file mode 100644
index 655e443..0000000
--- a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/**
- * @file reinforce_normal_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the ReinforceNormalLayer class, which implements the REINFORCE
- * algorithm for the normal distribution.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the reinforce normal layer. The reinforce normal layer
- * implements the REINFORCE algorithm for the normal distribution.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class ReinforceNormalLayer
-{
- public:
-  /**
-   * Create the ReinforceNormalLayer object.
-   *
-   * @param stdev Standard deviation used during the forward and backward pass.
-   */
-  ReinforceNormalLayer(const double stdev) : stdev(stdev)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    if (!deterministic)
-    {
-      // Multiply by standard deviations and re-center the means to the mean.
-      output = arma::randn<arma::Mat<eT> >(input.n_rows, input.n_cols) *
-          stdev + input;
-    }
-    else
-    {
-      // Use maximum a posteriori.
-      output = input;
-    }
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType>
-  void Backward(const DataType& input,
-                const DataType& /* gy */,
-                DataType& g)
-  {
-    g = (input - inputParameter) / std::pow(stdev, 2.0);
-
-    // Multiply by reward and multiply by -1.
-    g *= -reward;
-  }
-
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the value of the deterministic parameter.
-  bool Deterministic() const { return deterministic; }
-  //! Modify the value of the deterministic parameter.
-  bool& Deterministic() { return deterministic; }
-
-  //! Get the value of the reward parameter.
-  double Reward() const { return reward; }
-  //! Modify the value of the deterministic parameter.
-  double& Reward() { return reward; }
-
- private:
-  //! Standard deviation used during the forward and backward pass.
-  const double stdev;
-
-  //! Locally-stored reward parameter.
-  double reward;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! If true use maximum a posteriori during the forward pass.
-  bool deterministic;
-}; // class ReinforceNormalLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/softmax_layer.hpp
deleted file mode 100644
index a2d3323..0000000
--- a/src/mlpack/methods/ann/layer/softmax_layer.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * @file softmax_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the SoftmaxLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the softmax layer. The softmax loss layer computes the
- * multinomial logistic loss of the softmax of its inputs.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class SoftmaxLayer
-{
- public:
-  /**
-   * Create the SoftmaxLayer object.
-   */
-  SoftmaxLayer()
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = arma::trunc_exp(input -
-        arma::repmat(arma::max(input), input.n_rows, 1));
-    output /= arma::accu(output);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  void Backward(const arma::Mat<eT>& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = gy;
-  }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  InputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  InputDataType& Delta() { return delta; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& /* ar */, const unsigned int /* version */)
-  {
-    /* Nothing to do here */
-  }
-
- private:
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class SoftmaxLayer
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
deleted file mode 100644
index c3b723f..0000000
--- a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/**
- * @file sparse_bias_layer.hpp
- * @author Tham Ngap Wei
- *
- * Definition of the SparseBiasLayer class.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a bias layer design for sparse autoencoder.
- * The BiasLayer class represents a single layer of a neural network.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class SparseBiasLayer
-{
- public:
-  /**
-   * Create the SparseBiasLayer object using the specified number of units and
-   * bias parameter.
-   *
-   * @param outSize The number of output units.
-   * @param batchSize The batch size used to train the network.
-   * @param bias The bias value.
-   */
-  SparseBiasLayer(const size_t outSize, const size_t batchSize) :
-      outSize(outSize),
-      batchSize(batchSize)
-  {
-    weights.set_size(outSize, 1);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = input + arma::repmat(weights, 1, input.n_cols);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename DataType, typename ErrorType>
-  void Backward(const DataType& /* unused */,
-                const ErrorType& gy,
-                ErrorType& g)
-  {
-    g = gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the bias.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Gradient(const InputType& /* input */,
-                const arma::Mat<eT>& d,
-                InputDataType& g)
-  {
-    g = arma::sum(d, 1) / static_cast<typename InputDataType::value_type>(
-        batchSize);
-  }
-
-  //! Get the batch size
-  size_t BatchSize() const { return batchSize; }
-  //! Modify the batch size
-  size_t& BatchSize() { return batchSize; }
-
-  //! Get the weights.
-  InputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  InputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  InputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  InputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(batchSize, "batchSize");
-  }
-
- private:
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! The batch size used to train the network.
-  size_t batchSize;
-
-  //! Locally-stored weight object.
-  InputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  InputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class SparseBiasLayer
-
-//! Layer traits for the bias layer.
-template<typename InputDataType, typename OutputDataType
->
-class LayerTraits<SparseBiasLayer<InputDataType, OutputDataType> >
-{
- public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = true;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
deleted file mode 100644
index 6b1d9d1..0000000
--- a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * @file sparse_input_layer.hpp
- * @author Tham Ngap Wei
- *
- * Definition of the sparse input class which serve as the first layer
- * of the sparse autoencoder
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-#include <type_traits>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the SparseInputLayer. The SparseInputLayer class represents
- * the first layer of sparse autoencoder
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
-    >
-class SparseInputLayer
-{
- public:
-  /**
-   * Create the SparseInputLayer object using the specified number of units.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   * @param lambda L2-regularization parameter.
-   */
-  SparseInputLayer(const size_t inSize,
-                   const size_t outSize,
-                   const double lambda = 0.0001) :
-    inSize(inSize),
-    outSize(outSize),
-    lambda(lambda)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = weights * input;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& /* unused */,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    g = gy;
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT, typename GradientDataType>
-  void Gradient(const InputType& input,
-                const arma::Mat<eT>& d,
-                GradientDataType& g)
-  {
-    g = d * input.t() / static_cast<typename InputType::value_type>(
-        input.n_cols) + lambda * weights;
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(lambda, "lambda");
-  }
-
- private:
-  //! Locally-stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! L2-regularization parameter.
-  double lambda;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class SparseInputLayer
-
-//! Layer traits for the SparseInputLayer.
-template<typename InputDataType, typename OutputDataType
->
-class LayerTraits<SparseInputLayer<InputDataType, OutputDataType> >
-{
-public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
deleted file mode 100644
index 33a2a72..0000000
--- a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
+++ /dev/null
@@ -1,227 +0,0 @@
-/**
- * @file sparse_output_layer.hpp
- * @author Tham Ngap Wei
- *
- * This is the fourth layer of sparse autoencoder.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the SparseOutputLayer class. The SparseOutputLayer class
- * represents  the fourth layer of the sparse autoencoder.
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::mat,
-    typename OutputDataType = arma::mat
->
-class SparseOutputLayer
-{
- public:
-  /**
-   * Create the SparseLayer object using the specified number of units.
-   *
-   * @param inSize The number of input units.
-   * @param outSize The number of output units.
-   */
-  SparseOutputLayer(const size_t inSize,
-                    const size_t outSize,
-                    const double lambda = 0.0001,
-                    const double beta = 3,
-                    const double rho = 0.01) :
-    inSize(inSize),
-    outSize(outSize),
-    lambda(lambda),
-    beta(beta),
-    rho(rho)
-  {
-    weights.set_size(outSize, inSize);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data used for evaluating the specified function.
-   * @param output Resulting output activation.
-   */
-  template<typename eT>
-  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
-  {
-    output = weights * input;
-    // Average activations of the hidden layer.
-    rhoCap = arma::sum(input, 1) / static_cast<double>(input.n_cols);
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards trough f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Backward(const InputType& input,
-                const arma::Mat<eT>& gy,
-                arma::Mat<eT>& g)
-  {
-    const arma::mat klDivGrad = beta * (-(rho / rhoCap) + (1 - rho) /
-          (1 - rhoCap));
-
-    // NOTE: if the armadillo version high enough, find_nonfinite can prevents
-    // overflow value:
-    // klDivGrad.elem(arma::find_nonfinite(klDivGrad)).zeros();
-    g = weights.t() * gy +
-        arma::repmat(klDivGrad, 1, input.n_cols);
-  }
-
-  /*
-   * Calculate the gradient using the output delta and the input activation.
-   *
-   * @param input The propagated input.
-   * @param d The calculated error.
-   * @param g The calculated gradient.
-   */
-  template<typename InputType, typename eT>
-  void Gradient(const InputType input, const arma::Mat<eT>& d, arma::Mat<eT>& g)
-  {
-    g = d * input.t() / static_cast<typename InputType::value_type>(
-        input.n_cols) + lambda * weights;
-  }
-
-  //! Sets the KL divergence parameter.
-  void Beta(const double b)
-  {
-    beta = b;
-  }
-
-  //! Gets the KL divergence parameter.
-  double Beta() const
-  {
-    return beta;
-  }
-
-  //! Sets the sparsity parameter.
-  void Rho(const double r)
-  {
-    rho = r;
-  }
-
-  //! Gets the sparsity parameter.
-  double Rho() const
-  {
-    return rho;
-  }
-
-  //! Get the weights.
-  OutputDataType const& Weights() const { return weights; }
-  //! Modify the weights.
-  OutputDataType& Weights() { return weights; }
-
-  //! Get the RhoCap.
-  OutputDataType const& RhoCap() const { return rhoCap; }
-  //! Modify the RhoCap.
-  OutputDataType& RhoCap() { return rhoCap; }
-
-  //! Get the input parameter.
-  InputDataType const& InputParameter() const { return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType const& OutputParameter() const { return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType const& Delta() const { return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the gradient.
-  OutputDataType const& Gradient() const { return gradient; }
-  //! Modify the gradient.
-  OutputDataType& Gradient() { return gradient; }
-
-  /**
-   * Serialize the layer.
-   */
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */)
-  {
-    ar & data::CreateNVP(weights, "weights");
-    ar & data::CreateNVP(lambda, "lambda");
-    ar & data::CreateNVP(beta, "beta");
-    ar & data::CreateNVP(rho, "rho");
-  }
-
- private:
-  //! Locally-stored number of input units.
-  size_t inSize;
-
-  //! Locally-stored number of output units.
-  size_t outSize;
-
-  //! L2-regularization parameter.
-  double lambda;
-
-  //! KL divergence parameter.
-  double beta;
-
-  //! Sparsity parameter.
-  double rho;
-
-  //! Locally-stored weight object.
-  OutputDataType weights;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored gradient object.
-  OutputDataType gradient;
-
-  //! Average activations of the hidden layer.
-  OutputDataType rhoCap;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-}; // class SparseOutputLayer
-
-//! Layer traits for the SparseOutputLayer.
-template<typename InputDataType, typename OutputDataType
-    >
-class LayerTraits<SparseOutputLayer<InputDataType, OutputDataType> >
-{
-public:
-  static const bool IsBinary = false;
-  static const bool IsOutputLayer = false;
-  static const bool IsBiasLayer = false;
-  static const bool IsLSTMLayer = false;
-  static const bool IsConnection = true;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
deleted file mode 100644
index 5b4da8e..0000000
--- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * @file vr_class_reward_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the VRClassRewardLayer class, which implements the variance
- * reduced classification reinforcement layer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the variance reduced classification reinforcement layer.
- * This layer is meant to be used in combination with the reinforce normal layer
- * (ReinforceNormalLayer), which expects that an reward:
- * (1 for success, 0 otherwise).
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- *         arma::sp_mat or arma::cube).
- */
-template <
-    typename InputDataType = arma::field<arma::mat>,
-    typename OutputDataType = arma::field<arma::mat>
->
-class VRClassRewardLayer
-{
- public:
-  /**
-   * Create the VRClassRewardLayer object.
-   *
-   * @param scale Parameter used to scale the reward.
-   * @param sizeAverage Take the average over all batches.
-   */
-  VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) :
-      scale(scale),
-      sizeAverage(sizeAverage)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data that contains the log-probabilities for each class.
-   * @param target The target vector, that contains the class index in the range
-   *        between 1 and the number of classes.
-   */
-  template<typename eT>
-  double Forward(const arma::field<arma::Mat<eT> >& input,
-                 const arma::Mat<eT>& target)
-  {
-    return Forward(input(0, 0), target);
-  }
-
-  /**
-   * Ordinary feed forward pass of a neural network, evaluating the function
-   * f(x) by propagating the activity forward through f.
-   *
-   * @param input Input data that contains the log-probabilities for each class.
-   * @param target The target vector, that contains the class index in the range
-   *        between 1 and the number of classes.
-   */
-  template<typename eT>
-  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
-  {
-    reward = 0;
-    arma::uword index = 0;
-
-    for (size_t i = 0; i < input.n_cols; i++)
-    {
-      input.unsafe_col(i).max(index);
-      reward = ((index + 1) == target(i)) * scale;
-    }
-
-    if (sizeAverage)
-    {
-      return -reward / input.n_cols;
-    }
-
-    return -reward;
-  }
-
-  /**
-   * Ordinary feed backward pass of a neural network, calculating the function
-   * f(x) by propagating x backwards through f. Using the results from the feed
-   * forward pass.
-   *
-   * @param input The propagated input activation.
-   * @param gy The backpropagated error.
-   * @param g The calculated gradient.
-   */
-  template<typename eT>
-  double Backward(const arma::field<arma::Mat<eT> >& input,
-                const arma::Mat<eT>& /* gy */,
-                arma::field<arma::Mat<eT> >& g)
-  {
-    g = arma::field<arma::Mat<eT> >(2, 1);
-    g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols);
-
-    double vrReward = reward - arma::as_scalar(input(1, 0));
-    if (sizeAverage)
-    {
-      vrReward /= input(0, 0).n_cols;
-    }
-
-    const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0;
-
-    g(1, 0) = norm * (input(1, 0) - reward);
-
-    return vrReward;
-  }
-
-  //! Get the input parameter.
-  InputDataType& InputParameter() const {return inputParameter; }
-  //! Modify the input parameter.
-  InputDataType& InputParameter() { return inputParameter; }
-
-  //! Get the output parameter.
-  OutputDataType& OutputParameter() const {return outputParameter; }
-  //! Modify the output parameter.
-  OutputDataType& OutputParameter() { return outputParameter; }
-
-  //! Get the delta.
-  OutputDataType& Delta() const {return delta; }
-  //! Modify the delta.
-  OutputDataType& Delta() { return delta; }
-
-  //! Get the value of the deterministic parameter.
-  bool Deterministic() const { return deterministic; }
-  //! Modify the value of the deterministic parameter.
-  bool& Deterministic() { return deterministic; }
-
- private:
-  //! Locally-stored value to scale the reward.
-  const double scale;
-
-  //! If true take the average over all batches.
-  const bool sizeAverage;
-
-  //! Locally stored reward parameter.
-  double reward;
-
-  //! Locally-stored delta object.
-  OutputDataType delta;
-
-  //! Locally-stored input parameter object.
-  InputDataType inputParameter;
-
-  //! Locally-stored output parameter object.
-  OutputDataType outputParameter;
-
-  //! If true dropout and scaling is disabled, see notes above.
-  bool deterministic;
-}; // class VRClassRewardLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/network_traits.hpp b/src/mlpack/methods/ann/network_traits.hpp
deleted file mode 100644
index 5aa91e8..0000000
--- a/src/mlpack/methods/ann/network_traits.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * @file network_traits.hpp
- * @author Marcus Edel
- *
- * NetworkTraits class, a template class to get information about various
- * networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP
-#define MLPACK_METHODS_ANN_NETWORK_TRAITS_HPP
-
-namespace mlpack {
-namespace ann {
-
-/**
- * This is a template class that can provide information about various
- * networks. By default, this class will provide the weakest possible
- * assumptions on networks, and each network should override values as
- * necessary. If a network doesn't need to override a value, then there's no
- * need to write a NetworkTraits specialization for that class.
- */
-template<typename NetworkType>
-class NetworkTraits
-{
- public:
-  /**
-   * This is true if the network is a feed forward neural network.
-   */
-  static const bool IsFNN = false;
-
-  /**
-   * This is true if the network is a recurrent neural network.
-   */
-  static const bool IsRNN = false;
-
-  /**
-   * This is true if the network is a convolutional neural network.
-   */
-  static const bool IsCNN = false;
-
-  /**
-   * This is true if the network is a sparse autoencoder.
-   */
-  static const bool IsSAE = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
-
diff --git a/src/mlpack/methods/ann/network_util.hpp b/src/mlpack/methods/ann/network_util.hpp
deleted file mode 100644
index 93bdf04..0000000
--- a/src/mlpack/methods/ann/network_util.hpp
+++ /dev/null
@@ -1,247 +0,0 @@
-/**
- * @file network_util.hpp
- * @author Marcus Edel
- *
- * Neural network utilities.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_HPP
-#define MLPACK_METHODS_ANN_NETWORK_UTIL_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-/**
- * Neural network utility functions.
- */
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Auxiliary function to get the number of weights of the specified network.
- *
- * @param network The network used for specifying the number of weights.
- * @return The number of weights.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& network);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& network);
-
-/**
- * Auxiliary function to get the number of weights of the specified layer.
- *
- * @param layer The layer used for specifying the number of weights.
- * @param output The layer output parameter.
- * @return The number of weights.
- */
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& layer, P& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& layer, P& output);
-
-/**
- * Auxiliary function to set the weights of the specified network.
- *
- * @param weights The weights used to set the weights of the network.
- * @param network The network used to set the weights.
- * @param offset The memory offset of the weights.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-/**
- * Auxiliary function to set the weights of the specified layer.
- *
- * @param layer The layer used to set the weights.
- * @param weights The weights used to set the weights of the layer.
- * @param offset The memory offset of the weights.
- * @param output The output parameter of the layer.
- * @return The number of weights.
- */
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& output);
-
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::cube& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer, arma::mat& weights, size_t offset, P& output);
-
-/**
- * Auxiliary function to set the gradients of the specified network.
- *
- * @param gradients The gradients used to set the gradient of the network.
- * @param network The network used to set the gradients.
- * @param offset The memory offset of the gradients.
- * return The number of gradients.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& gradients,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& gradients,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-/**
- * Auxiliary function to set the gradients of the specified layer.
- *
- * @param layer The layer used to set the gradients.
- * @param gradients The gradients used to set the gradient of the layer.
- * @param offset The memory offset of the gradients.
- * @param output The output parameter of the layer.
- * @return The number of gradients.
- */
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::mat& output);
-
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::cube& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasGradientCheck<T, P&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer, arma::mat& gradients, size_t offset, P& output);
-
-/**
- * Auxiliary function to get the input size of the specified network.
- *
- * @param network The network used for specifying the input size.
- * @return The input size.
- */
-template<size_t I = 0, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& network);
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& network);
-
-/**
- * Auxiliary function to get the input size of the specified layer.
- *
- * @param layer The layer used for specifying the input size.
- * @param output The layer output parameter.
- * @return The input size.
- */
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& layer, P& output);
-
-template<typename T, typename P>
-typename std::enable_if<
-    HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& layer, P& output);
-
-/**
- * Auxiliary function to set the weights of the specified network using a given
- * initialize rule.
- *
- * @param initializeRule The rule used to initialize the network weights.
- * @param weights The weights used to set the weights of the network.
- * @param network The network used to set the weights.
- * @param offset The memory offset of the weights.
- */
-template<size_t I = 0, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& initializeRule,
-               arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-template<size_t I, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& initializeRule,
-               arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset = 0);
-
-/**
- * Auxiliary function to set the weights of the specified layer using the given
- * initialize rule.
- *
- * @param initializeRule The rule used to initialize the layer weights.
- * @param layer The layer used to set the weights.
- * @param weights The weights used to set the weights of the layer.
- * @param offset The memory offset of the weights.
- * @param output The output parameter of the layer.
- * @return The number of weights.
- */
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::mat& output);
-
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::cube& output);
-
-template<typename InitializationRuleType, typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             P& output);
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "network_util_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/ann/network_util_impl.hpp b/src/mlpack/methods/ann/network_util_impl.hpp
deleted file mode 100644
index 3203457..0000000
--- a/src/mlpack/methods/ann/network_util_impl.hpp
+++ /dev/null
@@ -1,286 +0,0 @@
-/**
- * @file network_util_impl.hpp
- * @author Marcus Edel
- *
- * Implementation of the network auxiliary functions.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP
-#define MLPACK_METHODS_ANN_NETWORK_UTIL_IMPL_HPP
-
-#include "network_util_impl.hpp"
-
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann {
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkSize(std::tuple<Tp...>& network)
-{
-  return LayerSize(std::get<I>(network), std::get<I>(
-      network).OutputParameter()) + NetworkSize<I + 1, Tp...>(network);
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& layer, P& /* unused */)
-{
-  return layer.Weights().n_elem;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerSize(T& /* unused */, P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset)
-{
-  NetworkWeights<I + 1, Tp...>(weights, network,
-      offset + LayerWeights(std::get<I>(network), weights,
-      offset, std::get<I>(network).OutputParameter()));
-
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(arma::mat& /* unused */,
-               std::tuple<Tp...>& /* unused */,
-               size_t /* unused */)
-{
-  /* Nothing to do here */
-}
-
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::mat& /* unused */)
-{
-  layer.Weights() = arma::mat(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::cube& /* unused */)
-{
-  layer.Weights() = arma::cube(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols,
-      layer.Weights().n_slices, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(T& /* unused */,
-             arma::mat& /* unused */,
-             size_t /* unused */,
-             P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& gradients,
-                 std::tuple<Tp...>& network,
-                 size_t offset)
-{
-  NetworkGradients<I + 1, Tp...>(gradients, network,
-      offset + LayerGradients(std::get<I>(network), gradients,
-      offset, std::get<I>(network).OutputParameter()));
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkGradients(arma::mat& /* unused */,
-               std::tuple<Tp...>& /* unused */,
-               size_t /* unused */)
-{
-  /* Nothing to do here */
-}
-
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::mat& /* unused */)
-{
-  layer.Gradient() = arma::mat(gradients.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T>
-typename std::enable_if<
-    HasGradientCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerGradients(T& layer,
-               arma::mat& gradients,
-               size_t offset,
-               arma::cube& /* unused */)
-{
-  layer.Gradient() = arma::cube(gradients.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols,
-      layer.Weights().n_slices, false, false);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-    !HasGradientCheck<T, P&(T::*)()>::value, size_t>::type
-LayerGradients(T& /* unused */,
-               arma::mat& /* unused */,
-               size_t /* unused */,
-               P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), size_t>::type
-NetworkInputSize(std::tuple<Tp...>& network)
-{
-  const size_t inputSize = LayerInputSize(std::get<I>(network), std::get<I>(
-      network).OutputParameter());
-
-  if (inputSize)
-  {
-    return inputSize;
-  }
-
-  return NetworkInputSize<I + 1, Tp...>(network);
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& layer, P& /* unused */)
-{
-  return layer.Weights().n_cols;
-}
-
-template<typename T, typename P>
-typename std::enable_if<
-  !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerInputSize(T& /* unused */, P& /* unused */)
-{
-  return 0;
-}
-
-template<size_t I, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I < sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& initializeRule,
-               arma::mat& weights,
-               std::tuple<Tp...>& network,
-               size_t offset)
-{
-  NetworkWeights<I + 1, InitializationRuleType, Tp...>(initializeRule, weights,
-      network, offset + LayerWeights(initializeRule, std::get<I>(network),
-      weights, offset, std::get<I>(network).OutputParameter()));
-}
-
-template<size_t I, typename InitializationRuleType, typename... Tp>
-typename std::enable_if<I == sizeof...(Tp), void>::type
-NetworkWeights(InitializationRuleType& /* initializeRule */,
-               arma::mat& /* weights */,
-               std::tuple<Tp...>& /* network */,
-               size_t /* offset */)
-{
-  /* Nothing to do here */
-}
-
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::mat& /* output */)
-{
-  layer.Weights() = arma::mat(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols, false, false);
-
-  initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows,
-      layer.Weights().n_cols);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename InitializationRuleType, typename T>
-typename std::enable_if<
-    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& initializeRule,
-             T& layer,
-             arma::mat& weights,
-             size_t offset,
-             arma::cube& /* output */)
-{
-  layer.Weights() = arma::cube(weights.memptr() + offset,
-      layer.Weights().n_rows, layer.Weights().n_cols,
-      layer.Weights().n_slices, false, false);
-
-  initializeRule.Initialize(layer.Weights(), layer.Weights().n_rows,
-      layer.Weights().n_cols);
-
-  return layer.Weights().n_elem;
-}
-
-template<typename InitializationRuleType, typename T, typename P>
-typename std::enable_if<
-    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
-LayerWeights(InitializationRuleType& /* initializeRule */,
-             T& /* layer */,
-             arma::mat& /* weights */,
-             size_t /* offset */,
-             P& /* output */)
-{
-  return 0;
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt b/src/mlpack/methods/ann/performance_functions/CMakeLists.txt
deleted file mode 100644
index c64f726..0000000
--- a/src/mlpack/methods/ann/performance_functions/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  mse_function.hpp
-  sse_function.hpp
-  cee_function.hpp
-  sparse_function.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/performance_functions/cee_function.hpp b/src/mlpack/methods/ann/performance_functions/cee_function.hpp
deleted file mode 100644
index 3424452..0000000
--- a/src/mlpack/methods/ann/performance_functions/cee_function.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * @file cee_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the cross-entropy error performance
- * function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The cross-entropy error performance function measures the network's
- * performance according to the cross entropy errors. The log in the cross-
- * entropy take sinto account the closeness of a prediction and is a more
- * granular way to calculate the error.
- *
- * @tparam Layer The layer that is connected with the output layer.
- */
-template<
-    class Layer = LinearLayer< >
->
-class CrossEntropyErrorFunction
-{
- public:
-  /**
-   * Computes the cross-entropy error function..
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error same as place holder
-   * @return sum of squared errors.
-   */
-  template<typename DataType, typename... Tp>
-  static double Error(const std::tuple<Tp...>& network,
-                      const DataType& target, const DataType &error)
-  {
-    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
-                 target, error);
-  }
-
-  /**
-   * Computes the cross-entropy error function.
-   *
-   * @param input Input data.
-   * @param target Target data.
-   * @return cross-entropy error.
-   */
-  template<typename DataType>
-  static double Error(const DataType& input, const DataType& target, const DataType&)
-  {
-    if (LayerTraits<Layer>::IsBinary)
-      return -arma::dot(arma::trunc_log(arma::abs(target - input)), target);
-
-    return -arma::dot(arma::trunc_log(input), target);
-  }
-
-}; // class CrossEntropyErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/mse_function.hpp b/src/mlpack/methods/ann/performance_functions/mse_function.hpp
deleted file mode 100644
index d2f1933..0000000
--- a/src/mlpack/methods/ann/performance_functions/mse_function.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- * @file mse_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the mean squared error performance function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The mean squared error performance function measures the network's
- * performance according to the mean of squared errors.
- */
-class MeanSquaredErrorFunction
-{
-  public:
-  /**
-   * Computes the mean squared error function.
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error same as place holder
-   * @return sum of squared errors.
-   */
-  template<typename DataType, typename... Tp>
-  static double Error(const std::tuple<Tp...>& network,
-                      const DataType& target, const DataType &error)
-  {
-    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
-                 target, error);
-  }
-
-  /**
-   * Computes the mean squared error function.
-   *
-   * @param input Input data.
-   * @param target Target data.
-   * @return mean of squared errors.
-   */
-  template<typename DataType>
-  static double Error(const DataType& input, const DataType& target, const DataType&)
-  {
-    return arma::mean(arma::mean(arma::square(target - input)));
-  }
-
-}; // class MeanSquaredErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp
deleted file mode 100644
index 145a0b6..0000000
--- a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * @file sparse_function.hpp
- * @author Siddharth Agrawal
- * @author Tham Ngap Wei
- *
- * Definition and implementation of the sparse performance function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The cost function design for the sparse autoencoder.
- */
-template<typename DataType = arma::mat>
-class SparseErrorFunction
-{
- public:
-  /**
-   * Computes the cost of sparse autoencoder.
-   *
-   * @param lambda L2-regularization parameter.
-   * @param beta KL divergence parameter.
-   * @param rho Sparsity parameter.
-   */
-  SparseErrorFunction(const double lambda = 0.0001,
-                      const double beta = 3,
-                      const double rho = 0.01) :
-    lambda(lambda), beta(beta), rho(rho)
-  {
-    // Nothing to do here.
-  }
-
-  SparseErrorFunction(SparseErrorFunction &&layer) noexcept
-  {
-    *this = std::move(layer);
-  }
-
-  SparseErrorFunction& operator=(SparseErrorFunction &&layer) noexcept
-  {
-    lambda = layer.lambda;
-    beta = layer.beta;
-    rho = layer.rho;
-
-    return *this;
-  }
-
-  //! Get the KL divergence parameter.
-  double Beta() const { return beta; }
-  //! Modify the KL divergence parameter.
-  void Beta(double value) { beta = value;}
-
-  //! Get the L2-regularization parameter.
-  double Lambda() const { return lambda; }
-  //! Modify the L2-regularization parameter.
-  void Lambda(double value) { lambda = value;}
-
-  //! Get the sparsity parameter.
-  double Rho() const { return rho; }
-  //! Modify the sparsity parameter.
-  void Rho(double value) { rho = value;}
-
-  /**
-   * Computes the cost of sparse autoencoder.
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error different between output and the input
-   * @return sum of squared errors.
-   */
-  template<typename InType, typename Tp>
-  double Error(const Tp& network,
-               const InType& target, const InType &error)
-  {
-    return Error(std::get<0>(network).Weights(), std::get<3>(network).Weights(),
-        std::get<3>(network).RhoCap(), target, error);
-  }
-
-  /**
-   * Computes the cost of sparse autoencoder.
-   *
-   * @param w1 weights of hidden layer
-   * @param w2 weights of output layer
-   * @param rhoCap Average activations of the hidden layer
-   * @param target Target data.
-   * @param error different between output and the input
-   * @return sum of squared errors.
-   */
-  template<typename InType>
-  double Error(const InType& w1, const InType& w2,
-               const InType& rhoCap, const InType& target,
-               const InType& error)
-  {
-    // Calculate squared L2-norms of w1 and w2.
-    const double wL2SquaredNorm =
-        arma::accu(w1 % w1) + arma::accu(w2 % w2);
-
-    // Calculate the reconstruction error, the regularization cost and the KL
-    // divergence cost terms. 'sumOfSquaresError' is the average squared l2-norm
-    // of the reconstructed data difference. 'weightDecay' is the squared l2-norm
-    // of the weights w1 and w2. 'klDivergence' is the cost of the hidden layer
-    // activations not being low. It is given by the following formula:
-    // KL = sum_over_hSize(rho*log(rho/rhoCaq) + (1-rho)*log((1-rho)/(1-rhoCap)))
-    const double sumOfSquaresError =
-        0.5 * arma::accu(error % error) / target.n_cols;
-
-    const double weightDecay = 0.5 * lambda * wL2SquaredNorm;
-    const double klDivergence =
-        beta * arma::accu(rho * arma::trunc_log(rho / rhoCap) + (1 - rho) *
-                          arma::trunc_log((1 - rho) / (1 - rhoCap)));
-
-    // The cost is the sum of the terms calculated above.
-    return sumOfSquaresError + weightDecay + klDivergence;
-  }
-
- private:
-  //! Locally stored L2-regularization parameter.
-  double lambda;
-
-  //! Locally stored KL divergence parameter.
-  double beta;
-
-  //! Locally stored sparsity parameter.
-  double rho;
-
-}; // class SparseErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/performance_functions/sse_function.hpp b/src/mlpack/methods/ann/performance_functions/sse_function.hpp
deleted file mode 100644
index 34055fb..0000000
--- a/src/mlpack/methods/ann/performance_functions/sse_function.hpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * @file sse_function.hpp
- * @author Marcus Edel
- *
- * Definition and implementation of the sum squared error performance function.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP
-#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * The sum squared error performance function measures the network's performance
- * according to the sum of squared errors.
- */
-class SumSquaredErrorFunction
-{
-  public:
-  /**
-   * Computes the sum squared error function.
-   *
-   * @param network Network type of FFN, CNN or RNN
-   * @param target Target data.
-   * @param error same as place holder
-   * @return sum of squared errors.
-   */
-  template<typename DataType, typename... Tp>
-  static double Error(const std::tuple<Tp...>& network,
-                      const DataType& target,
-                      const DataType &error)
-  {
-    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
-                 target, error);
-  }
-
-  /**
-   * Computes the sum squared error function.
-   *
-   * @param input Input data.
-   * @param target Target data.
-   * @return sum of squared errors.
-   */
-  template<typename DataType>
-  static double Error(const DataType& input,
-                      const DataType& target,
-                      const DataType&)
-  {
-    return arma::sum(arma::square(target - input));
-  }
-
-}; // class SumSquaredErrorFunction
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt b/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
deleted file mode 100644
index 99b6b80..0000000
--- a/src/mlpack/methods/ann/pooling_rules/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  max_pooling.hpp
-  mean_pooling.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp
deleted file mode 100644
index f50b041..0000000
--- a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * @file max_pooling.hpp
- * @author Shangtong Zhang
- *
- * Definition of the MaxPooling class, which implements max pooling.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP
-#define MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/*
- * The max pooling rule for convolution neural networks. Take the maximum value
- * within the receptive block.
- */
-class MaxPooling
-{
- public:
-  /*
-   * Return the maximum value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   */
-  template<typename MatType>
-  double Pooling(const MatType& input)
-  {
-    return input.max();
-  }
-
-  /*
-   * Set the maximum value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   * @param value The unpooled value.
-   * @param output The unpooled output data.
-   */
-  template<typename MatType>
-  void Unpooling(const MatType& input, const double value, MatType& output)
-  {
-    output = MatType(input.n_rows, input.n_cols);
-    output.fill(value / input.n_elem);
-  }
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp
deleted file mode 100644
index 7ab88c3..0000000
--- a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * @file mean_pooling.hpp
- * @author Shangtong Zhang
- *
- * Definition of the MeanPooling class, which implements mean pooling.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP
-#define MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/*
- * The mean pooling rule for convolution neural networks. Average all values
- * within the receptive block.
- */
-class MeanPooling
-{
- public:
-  /*
-   * Return the average value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   */
-  template<typename MatType>
-  double Pooling(const MatType& input)
-  {
-    return arma::mean(arma::mean(input));
-  }
-
-  /*
-   * Set the average value within the receptive block.
-   *
-   * @param input Input used to perform the pooling operation.
-   * @param value The unpooled value.
-   * @param output The unpooled output data.
-   */
-  template<typename MatType>
-  void Unpooling(const MatType& input, const double value, MatType& output)
-  {
-    output = MatType(input.n_rows, input.n_cols);
-    output.fill(value / input.n_elem);
-  }
-};
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
deleted file mode 100644
index 6b9483c..0000000
--- a/src/mlpack/methods/ann/rnn.hpp
+++ /dev/null
@@ -1,799 +0,0 @@
-/**
- * @file rnn.hpp
- * @author Marcus Edel
- *
- * Definition of the RNN class, which implements recurrent neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_RNN_HPP
-#define MLPACK_METHODS_ANN_RNN_HPP
-
-#include <mlpack/core.hpp>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/core/optimizers/sgd/sgd.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of a standard recurrent neural network.
- *
- * @tparam LayerTypes Contains all layer modules used to construct the network.
- * @tparam OutputLayerType The output layer type used to evaluate the network.
- * @tparam InitializationRuleType Rule used to initialize the weight matrix.
- * @tparam PerformanceFunction Performance strategy used to calculate the error.
- */
-template <
-  typename LayerTypes,
-  typename OutputLayerType,
-  typename InitializationRuleType = NguyenWidrowInitialization,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
->
-class RNN
-{
- public:
-  //! Convenience typedef for the internal model construction.
-  using NetworkType = RNN<LayerTypes,
-                          OutputLayerType,
-                          InitializationRuleType,
-                          PerformanceFunction>;
-
-  /**
-   * Create the RNN object with the given predictors and responses set (this is
-   * the set that is used to train the network) and the given optimizer.
-   * Optionally, specify which initialize rule and performance function should
-   * be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType,
-           typename OutputType,
-           template<typename> class OptimizerType>
-  RNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
-      const arma::mat& responses,
-      OptimizerType<NetworkType>& optimizer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the RNN object with the given predictors and responses set (this is
-   * the set that is used to train the network). Optionally, specify which
-   * initialize rule and performance function should be used.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param predictors Input training variables.
-   * @param responses Outputs resulting from input training variables.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  RNN(LayerType &&network,
-      OutputType &&outputLayer,
-      const arma::mat& predictors,
-      const arma::mat& responses,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Create the RNN object with an empty predictors and responses set and
-   * default optimizer. Make sure to call Train(predictors, responses) when
-   * training.
-   *
-   * @param network Network modules used to construct the network.
-   * @param outputLayer Output layer used to evaluate the network.
-   * @param initializeRule Optional instantiated InitializationRule object
-   *        for initializing the network parameter.
-   * @param performanceFunction Optional instantiated PerformanceFunction
-   *        object used to calculate the error.
-   */
-  template<typename LayerType, typename OutputType>
-  RNN(LayerType &&network,
-      OutputType &&outputLayer,
-      InitializationRuleType initializeRule = InitializationRuleType(),
-      PerformanceFunction performanceFunction = PerformanceFunction());
-
-  /**
-   * Train the recurrent neural network on the given input data. By default, the
-   * SGD optimization algorithm is used, but others can be specified
-   * (such as mlpack::optimization::RMSprop).
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::SGD
-  >
-  void Train(const arma::mat& predictors, const arma::mat& responses);
-
-  /**
-   * Train the recurrent neural network with the given instantiated optimizer.
-   * Using this overload allows configuring the instantiated optimizer before
-   * training is performed.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::SGD
-  >
-  void Train(OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Train the recurrent neural network on the given input data using the given
-   * optimizer.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::SGD
-  >
-  void Train(const arma::mat& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Predict the responses to a given set of predictors. The responses will
-   * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
-   *
-   * @param predictors Input predictors.
-   * @param responses Matrix to put output predictions of responses into.
-   */
-  void Predict(arma::mat& predictors, arma::mat& responses);
-
-  /**
-   * Evaluate the recurrent neural network with the given parameters. This
-   * function is usually called by the optimizer to train the model.
-   *
-   * @param parameters Matrix model parameters.
-   * @param i Index of point to use for objective function evaluation.
-   * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
-   */
-  double Evaluate(const arma::mat& parameters,
-                  const size_t i,
-                  const bool deterministic = true);
-
-  /**
-   * Evaluate the gradient of the recurrent neural network with the given
-   * parameters, and with respect to only one point in the dataset. This is
-   * useful for optimizers such as SGD, which require a separable objective
-   * function.
-   *
-   * @param parameters Matrix of the model parameters to be optimized.
-   * @param i Index of points to use for objective function gradient evaluation.
-   * @param gradient Matrix to output gradient into.
-   */
-  void Gradient(const arma::mat& parameters,
-                const size_t i,
-                arma::mat& gradient);
-
-  //! Return the number of separable functions (the number of predictor points).
-  size_t NumFunctions() const { return numFunctions; }
-
-  //! Return the initial point for the optimization.
-  const arma::mat& Parameters() const { return parameter; }
-  //! Modify the initial point for the optimization.
-  arma::mat& Parameters() { return parameter; }
-
-  //! Serialize the model.
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
-
- private:
-  /*
-   * Predict the response of the given input matrix.
-   */
-  template <typename DataType>
-  void SinglePredict(const DataType& input, DataType& output)
-  {
-    deterministic = true;
-    seqLen = input.n_rows / inputSize;
-    ResetParameter(network);
-
-    // Iterate through the input sequence and perform the feed forward pass.
-    for (seqNum = 0; seqNum < seqLen; seqNum++)
-    {
-      // Perform the forward pass and save the activations.
-      Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1),
-          network);
-      SaveActivations(network);
-
-      // Retrieve output of the subsequence.
-      if (seqOutput)
-      {
-        DataType seqOutput;
-        OutputPrediction(seqOutput, network);
-        output = arma::join_cols(output, seqOutput);
-      }
-    }
-
-    // Retrieve output of the complete sequence.
-    if (!seqOutput)
-      OutputPrediction(output, network);
-  }
-
-  /**
-   * Reset the network by clearing the layer activations and by setting the
-   * layer status.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* unused */)
-  {
-    activations.clear();
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    ResetSeqLen(std::get<I>(network));
-    ResetRecurrent(std::get<I>(network), std::get<I>(network).InputParameter());
-    std::get<I>(network).Delta().zeros();
-
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Reset the layer status by setting the current deterministic parameter
-   * for all layer that implement the Deterministic function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* unused */) { /* Nothing to do here */ }
-
-  /**
-   * Reset the layer sequence length by setting the current seqLen parameter
-   * for all layer that implement the SeqLen function.
-   */
-  template<typename T>
-  typename std::enable_if<
-      HasSeqLenCheck<T, size_t&(T::*)(void)>::value, void>::type
-  ResetSeqLen(T& layer)
-  {
-    layer.SeqLen() = seqLen;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasSeqLenCheck<T, size_t&(T::*)(void)>::value, void>::type
-  ResetSeqLen(T& /* unused */) { /* Nothing to do here */ }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when resetting
-   * the recurrent parameter.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  ResetRecurrent(T& layer, P& /* unused */)
-  {
-    layer.RecurrentParameter().zeros();
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  ResetRecurrent(T& /* unused */, P& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Initialize the network by setting the input size and output size.
-   */
-  template<size_t I = 0, typename InputDataType, typename TargetDataType,
-      typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp) - 1, void>::type
-  InitLayer(const InputDataType& /* unused */,
-            const TargetDataType& target,
-            std::tuple<Tp...>& /* unused */)
-  {
-    seqOutput = outputSize < target.n_elem ? true : false;
-  }
-
-  template<size_t I = 0, typename InputDataType, typename TargetDataType,
-      typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp) - 1, void>::type
-  InitLayer(const InputDataType& input,
-            const TargetDataType& target,
-            std::tuple<Tp...>& network)
-  {
-    Init(std::get<I>(network), std::get<I>(network).OutputParameter(),
-       std::get<I + 1>(network).Delta());
-
-    InitLayer<I + 1, InputDataType, TargetDataType, Tp...>(input, target,
-        network);
-  }
-
-  /**
-   * Retrieve the weight matrix for all layer that implement the Weights
-   * function to extract the input size and output size.
-   */
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Init(T& layer, P& /* unused */, D& /* unused */)
-  {
-    // Initialize the input size only once.
-    if (!inputSize)
-      inputSize = layer.Weights().n_cols;
-
-    outputSize = layer.Weights().n_rows;
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
-  Init(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Save the network layer activations.
-   */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  SaveActivations(std::tuple<Tp...>& /* unused */)
-  {
-    Save(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    LinkRecurrent(network);
-  }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  SaveActivations(std::tuple<Tp...>& network)
-  {
-    Save(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    SaveActivations<I + 1, Max, Tp...>(network);
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    if (activations.size() == layerNumber)
-    {
-      activations.push_back(new arma::mat(layer.RecurrentParameter().n_rows,
-          seqLen));
-    }
-
-    activations[layerNumber].unsafe_col(seqNum) = layer.RecurrentParameter();
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    if (activations.size() == layerNumber)
-    {
-      activations.push_back(new arma::mat(layer.OutputParameter().n_rows,
-          seqLen));
-    }
-
-    activations[layerNumber].unsafe_col(seqNum) = layer.OutputParameter();
-  }
-
-  /**
-   * Load the network layer activations.
-   */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename DataType, typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  LoadActivations(DataType& input, std::tuple<Tp...>& network)
-  {
-    Load(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    std::get<0>(network).InputParameter() = input;
-  }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename DataType, typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  LoadActivations(DataType& input, std::tuple<Tp...>& network)
-  {
-    Load(I, std::get<I>(network), std::get<I>(network).InputParameter());
-    LoadActivations<I + 1, Max, DataType, Tp...>(input, network);
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    layer.RecurrentParameter() = activations[layerNumber].unsafe_col(seqNum);
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber, T& layer, P& /* unused */)
-  {
-    layer.OutputParameter() = activations[layerNumber].unsafe_col(seqNum);
-  }
-
-  /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).InputParameter() = input;
-    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
-        std::get<I>(network).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Link the calculated activation with the correct layer.
-   */
-  template<
-      size_t I = 1,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  LinkParameter(std::tuple<Tp ...>& /* unused */)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-  }
-
-  template<
-      size_t I = 1,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Max, Tp...>(network);
-  }
-
-  /**
-   * Link the calculated activation with the correct recurrent layer.
-   */
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I == Max, void>::type
-  LinkRecurrent(std::tuple<Tp ...>& /* unused */) { /* Nothing to do here */ }
-
-  template<
-      size_t I = 0,
-      size_t Max = std::tuple_size<LayerTypes>::value - 1,
-      typename... Tp
-  >
-  typename std::enable_if<I < Max, void>::type
-  LinkRecurrent(std::tuple<Tp...>& network)
-  {
-    UpdateRecurrent(std::get<I>(network), std::get<I>(network).InputParameter(),
-        std::get<I + 1>(network).OutputParameter());
-    LinkRecurrent<I + 1, Max, Tp...>(network);
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when updating
-   * the recurrent activations.
-   */
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  UpdateRecurrent(T& layer, P& /* unused */, D& output)
-  {
-    layer.RecurrentParameter() = output;
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  UpdateRecurrent(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /*
-   * Calculate the output error and update the overall error.
-   */
-  template<typename DataType, typename ErrorType, typename... Tp>
-  double OutputError(const DataType& target,
-                     ErrorType& error,
-                     const std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output error.
-    outputLayer.CalculateError(
-        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
-
-    // Masures the network's performance with the specified performance
-    // function.
-    return performanceFunc.Error(network, target, error);
-  }
-
-  /**
-   * Run a single iteration of the feed backward algorithm, using the given
-   * error of the output layer. Note that we iterate backward through the
-   * layer modules.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  void Backward(DataType& error, std::tuple<Tp ...>& network)
-  {
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* unused */, std::tuple<Tp...>& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
-  {
-    BackwardRecurrent(std::get<sizeof...(Tp) - I - 1>(network),
-        std::get<sizeof...(Tp) - I - 1>(network).InputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta());
-
-    std::get<sizeof...(Tp) - I>(network).Backward(
-        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
-        std::get<sizeof...(Tp) - I>(network).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, network);
-  }
-
-  /*
-   * Update the delta of the recurrent layer.
-   */
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  BackwardRecurrent(T& layer, P& /* unused */, D& delta)
-  {
-    if (!layer.Delta().is_empty())
-      delta += layer.Delta();
-  }
-
-  template<typename T, typename P, typename D>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  BackwardRecurrent(T& /* unused */, P& /* unused */, D& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<size_t I = 0, size_t Max = std::tuple_size<LayerTypes>::value - 2,
-      typename... Tp>
-  typename std::enable_if<I == Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-        std::get<I + 1>(network).Delta(), std::get<I + 1>(network),
-        std::get<I + 1>(network).InputParameter(),
-        std::get<I + 1>(network).Delta());
-  }
-
-  template<size_t I = 0, size_t Max = std::tuple_size<LayerTypes>::value - 2,
-      typename... Tp>
-  typename std::enable_if<I < Max, void>::type
-  UpdateGradients(std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
-        std::get<I + 1>(network).Delta(), std::get<I + 1>(network),
-        std::get<I + 1>(network).InputParameter(),
-        std::get<I + 2>(network).Delta());
-
-    UpdateGradients<I + 1, Max, Tp...>(network);
-  }
-
-  template<typename T1, typename P1, typename D1, typename T2, typename P2,
-      typename D2>
-  typename std::enable_if<
-      HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
-  Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */,
-         P2& /* unused */, D2& delta2)
-  {
-    layer.Gradient(layer.InputParameter(), delta2, layer.Gradient());
-  }
-
-  template<typename T1, typename P1, typename D1, typename T2, typename P2,
-      typename D2>
-  typename std::enable_if<
-      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value) ||
-      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value), void>::type
-  Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */,
-         P2& /* unused */, D2& /* unused */)
-  {
-    /* Nothing to do here */
-  }
-
-  template<typename T1, typename P1, typename D1, typename T2, typename P2,
-      typename D2>
-  typename std::enable_if<
-      HasGradientCheck<T1, P1&(T1::*)()>::value &&
-      !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
-  Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */,
-         P2& /* unused */, D2& /* unused */)
-  {
-    layer.Gradient(layer.InputParameter(), delta1, layer.Gradient());
-  }
-
-  /*
-   * Calculate and store the output activation.
-   */
-  template<typename DataType, typename... Tp>
-  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
-  {
-    // Calculate and store the output prediction.
-    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
-        network).OutputParameter(), output);
-  }
-
-  //! Instantiated recurrent neural network.
-  LayerTypes network;
-
-  //! The outputlayer used to evaluate the network
-  OutputLayerType& outputLayer;
-
-  //! Performance strategy used to claculate the error.
-  PerformanceFunction performanceFunc;
-
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
-
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
-
-  //! The matrix of data points (predictors).
-  arma::mat predictors;
-
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
-
-  //! Locally stored network input size.
-  size_t inputSize;
-
-  //! Locally stored network output size.
-  size_t outputSize;
-
-  //! The index of the current sequence number.
-  size_t seqNum;
-
-  //! Locally stored number of samples in one input sequence.
-  size_t seqLen;
-
-  //! Locally stored parameter that indicates if the input is a sequence.
-  bool seqOutput;
-
-  //! The activation storage we are using to perform the feed backward pass.
-  boost::ptr_vector<arma::mat> activations;
-
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
-
-  //! Locally stored backward error.
-  arma::mat error;
-}; // class RNN
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "rnn_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/ann/rnn_impl.hpp b/src/mlpack/methods/ann/rnn_impl.hpp
deleted file mode 100644
index d8d2f07..0000000
--- a/src/mlpack/methods/ann/rnn_impl.hpp
+++ /dev/null
@@ -1,357 +0,0 @@
-/**
- * @file rnn_impl.hpp
- * @author Marcus Edel
- *
- * Definition of the RNN class, which implements recurrent neural networks.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_ANN_RNN_IMPL_HPP
-#define MLPACK_METHODS_ANN_RNN_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "rnn.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType,
-         typename OutputType,
-         template<typename> class OptimizerType
->
-RNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::RNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       OptimizerType<NetworkType>& optimizer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    predictors(predictors),
-    responses(responses),
-    numFunctions(predictors.n_cols),
-    inputSize(0),
-    outputSize(0)
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  // Train the model.
-  Timer::Start("rnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("rnn_optimization");
-
-  Log::Info << "RNN::RNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-RNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::RNN(LayerType &&network,
-       OutputType &&outputLayer,
-       const arma::mat& predictors,
-       const arma::mat& responses,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    inputSize(0),
-    outputSize(0)
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-
-  Train(predictors, responses);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename LayerType, typename OutputType>
-RNN<LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::RNN(LayerType &&network,
-       OutputType &&outputLayer,
-       InitializationRuleType initializeRule,
-       PerformanceFunction performanceFunction) :
-    network(std::forward<LayerType>(network)),
-    outputLayer(std::forward<OutputType>(outputLayer)),
-    performanceFunc(std::move(performanceFunction)),
-    inputSize(0),
-    outputSize(0)
-{
-  static_assert(std::is_same<typename std::decay<LayerType>::type,
-                  LayerTypes>::value,
-                  "The type of network must be LayerTypes.");
-
-  static_assert(std::is_same<typename std::decay<OutputType>::type,
-                OutputLayerType>::value,
-                "The type of outputLayer must be OutputLayerType.");
-
-  initializeRule.Initialize(parameter, NetworkSize(this->network), 1);
-  NetworkWeights(parameter, this->network);
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors, const arma::mat& responses)
-{
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  OptimizerType<decltype(*this)> optimizer(*this);
-
-  // Train the model.
-  Timer::Start("rnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("rnn_optimization");
-
-  Log::Info << "RNN::RNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<template<typename> class OptimizerType>
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(const arma::mat& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
-{
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  // Train the model.
-  Timer::Start("rnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("rnn_optimization");
-
-  Log::Info << "RNN::RNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<
-    template<typename> class OptimizerType
->
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Train(OptimizerType<NetworkType>& optimizer)
-{
-  // Train the model.
-  Timer::Start("rnn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("rnn_optimization");
-
-  Log::Info << "RNN::RNN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Predict(arma::mat& predictors, arma::mat& responses)
-{
-  arma::mat responsesTemp;
-  SinglePredict(arma::mat(predictors.colptr(0), predictors.n_rows,
-      1, false, true), responsesTemp);
-
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
-  responses.col(0) = responsesTemp.col(0);
-
-  for (size_t i = 1; i < predictors.n_cols; i++)
-  {
-    SinglePredict(arma::mat(predictors.colptr(i), predictors.n_rows,
-      1, false, true), responsesTemp);
-    responses.col(i) = responsesTemp.col(0);
-  }
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-double RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
-{
-  this->deterministic = deterministic;
-
-  arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows,
-      1, false, true);
-  arma::mat target = arma::mat(responses.colptr(i), responses.n_rows,
-      1, false, true);
-
-  // Initialize the activation storage only once.
-  if (activations.empty())
-    InitLayer(input, target, network);
-
-  double networkError = 0;
-  seqLen = input.n_rows / inputSize;
-  ResetParameter(network);
-
-  error = arma::mat(outputSize, outputSize < target.n_elem ? seqLen : 1);
-
-  // Iterate through the input sequence and perform the feed forward pass.
-  for (seqNum = 0; seqNum < seqLen; seqNum++)
-  {
-    // Perform the forward pass and save the activations.
-    Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1),
-        network);
-    SaveActivations(network);
-
-    // Retrieve output error of the subsequence.
-    if (seqOutput)
-    {
-      arma::mat seqError = error.unsafe_col(seqNum);
-      arma::mat seqTarget = target.submat(seqNum * outputSize, 0,
-          (seqNum + 1) * outputSize - 1, 0);
-      networkError += OutputError(seqTarget, seqError, network);
-    }
-  }
-
-  // Retrieve output error of the complete sequence.
-  if (!seqOutput)
-    return OutputError(target, error, network);
-
-  return networkError;
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
-{
-  if (gradient.is_empty())
-  {
-    gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
-  }
-  else
-  {
-    gradient.zeros();
-  }
-
-  Evaluate(parameter, i, false);
-
-  arma::mat currentGradient = arma::mat(gradient.n_rows, gradient.n_cols);
-  NetworkGradients(currentGradient, network);
-
-  const arma::mat input = arma::mat(predictors.colptr(i), predictors.n_rows,
-      1, false, true);
-
-  // Iterate through the input sequence and perform the feed backward pass.
-  for (seqNum = seqLen - 1; seqNum >= 0; seqNum--)
-  {
-    // Load the network activation for the upcoming backward pass.
-    LoadActivations(input.rows(seqNum * inputSize, (seqNum + 1) *
-        inputSize - 1), network);
-
-    // Perform the backward pass.
-    if (seqOutput)
-    {
-      arma::mat seqError = error.unsafe_col(seqNum);
-      Backward(seqError, network);
-    }
-    else
-    {
-      Backward(error, network);
-    }
-
-    // Link the parameters and update the gradients.
-    LinkParameter(network);
-    UpdateGradients<>(network);
-
-    // Update the overall gradient.
-    gradient += currentGradient;
-
-    if (seqNum == 0) break;
-  }
-}
-
-template<typename LayerTypes,
-         typename OutputLayerType,
-         typename InitializationRuleType,
-         typename PerformanceFunction
->
-template<typename Archive>
-void RNN<
-LayerTypes, OutputLayerType, InitializationRuleType, PerformanceFunction
->::Serialize(Archive& ar, const unsigned int /* version */)
-{
-  ar & data::CreateNVP(parameter, "parameter");
-
-  // If we are loading, we need to initialize the weights.
-  if (Archive::is_loading::value)
-  {
-    NetworkWeights(parameter, network);
-  }
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/mvu/CMakeLists.txt b/src/mlpack/methods/mvu/CMakeLists.txt
deleted file mode 100644
index 8fbaec5..0000000
--- a/src/mlpack/methods/mvu/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile.
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  mvu.hpp
-  mvu.cpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_cli_executable(mvu)
diff --git a/src/mlpack/methods/mvu/mvu.cpp b/src/mlpack/methods/mvu/mvu.cpp
deleted file mode 100644
index 8c02d0c..0000000
--- a/src/mlpack/methods/mvu/mvu.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * @file mvu.cpp
- * @author Ryan Curtin
- *
- * Implementation of the MVU class and its auxiliary objective function class.
- *
- * Note: this implementation of MVU does not work.  See #189.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include "mvu.hpp"
-
-//#include <mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp>
-#include <mlpack/core/optimizers/sdp/lrsdp.hpp>
-
-#include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-
-using namespace mlpack;
-using namespace mlpack::mvu;
-using namespace mlpack::optimization;
-
-MVU::MVU(const arma::mat& data) : data(data)
-{
-  // Nothing to do.
-}
-
-void MVU::Unfold(const size_t newDim,
-                 const size_t numNeighbors,
-                 arma::mat& outputData)
-{
-  // First we have to choose the output point.  We'll take a linear projection
-  // of the data for now (this is probably not a good final solution).
-//  outputData = trans(data.rows(0, newDim - 1));
-  // Following Nick's idea.
-  outputData.randu(data.n_cols, newDim);
-
-  // The number of constraints is the number of nearest neighbors plus one.
-  LRSDP<arma::sp_mat> mvuSolver(numNeighbors * data.n_cols + 1, outputData);
-
-  // Set up the objective.  Because we are maximizing the trace of (R R^T),
-  // we'll instead state it as min(-I_n * (R R^T)), meaning C() is -I_n.
-  mvuSolver.C().eye(data.n_cols, data.n_cols);
-  mvuSolver.C() *= -1;
-
-  // Now set up each of the constraints.
-  // The first constraint is trace(ones * R * R^T) = 0.
-  mvuSolver.B()[0] = 0;
-  mvuSolver.A()[0].ones(data.n_cols, data.n_cols);
-
-  // All of our other constraints will be sparse except the first.  So set that
-  // vector of modes accordingly.
-  mvuSolver.AModes().ones();
-  mvuSolver.AModes()[0] = 0;
-
-  // Now all of the other constraints.  We first have to run KNN to get the
-  // list of nearest neighbors.
-  arma::Mat<size_t> neighbors;
-  arma::mat distances;
-
-  KNN knn(data);
-  knn.Search(numNeighbors, neighbors, distances);
-
-  // Add each of the other constraints.  They are sparse constraints:
-  //   Tr(A_ij K) = d_ij;
-  //   A_ij = zeros except for 1 at (i, i), (j, j); -1 at (i, j), (j, i).
-  for (size_t i = 0; i < neighbors.n_cols; ++i)
-  {
-    for (size_t j = 0; j < numNeighbors; ++j)
-    {
-      // This is the index of the constraint.
-      const size_t index = (i * numNeighbors) + j + 1;
-
-      arma::mat& aRef = mvuSolver.A()[index];
-
-      aRef.set_size(3, 4);
-
-      // A_ij(i, i) = 1.
-      aRef(0, 0) = i;
-      aRef(1, 0) = i;
-      aRef(2, 0) = 1;
-
-      // A_ij(i, j) = -1.
-      aRef(0, 1) = i;
-      aRef(1, 1) = neighbors(j, i);
-      aRef(2, 1) = -1;
-
-      // A_ij(j, i) = -1.
-      aRef(0, 2) = neighbors(j, i);
-      aRef(1, 2) = i;
-      aRef(2, 2) = -1;
-
-      // A_ij(j, j) = 1.
-      aRef(0, 3) = neighbors(j, i);
-      aRef(1, 3) = neighbors(j, i);
-      aRef(2, 3) = 1;
-
-      // The constraint b_ij is the distance between these two points.
-      mvuSolver.B()[index] = distances(j, i);
-    }
-  }
-
-  // Now on with the solving.
-  double objective = mvuSolver.Optimize(outputData);
-
-  Log::Info << "Final objective is " << objective << "." << std::endl;
-
-  // Revert to original data format.
-  outputData = trans(outputData);
-}
diff --git a/src/mlpack/methods/mvu/mvu.hpp b/src/mlpack/methods/mvu/mvu.hpp
deleted file mode 100644
index c7f173b..0000000
--- a/src/mlpack/methods/mvu/mvu.hpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * @file mvu.hpp
- * @author Ryan Curtin
- *
- * An implementation of Maximum Variance Unfolding.  This file defines an MVU
- * class as well as a class representing the objective function (a semidefinite
- * program) which MVU seeks to minimize.  Minimization is performed by the
- * Augmented Lagrangian optimizer (which in turn uses the L-BFGS optimizer).
- *
- * Note: this implementation of MVU does not work.  See #189.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef MLPACK_METHODS_MVU_MVU_HPP
-#define MLPACK_METHODS_MVU_MVU_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace mvu {
-
-/**
- * The MVU class is meant to provide a good abstraction for users.  The dataset
- * needs to be provided, as well as several parameters.
- *
- * - dataset
- * - new dimensionality
- */
-class MVU
-{
- public:
-  MVU(const arma::mat& dataIn);
-
-  void Unfold(const size_t newDim,
-              const size_t numNeighbors,
-              arma::mat& outputCoordinates);
-
- private:
-  const arma::mat& data;
-};
-
-} // namespace mvu
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/mvu/mvu_main.cpp b/src/mlpack/methods/mvu/mvu_main.cpp
deleted file mode 100644
index 07f0a99..0000000
--- a/src/mlpack/methods/mvu/mvu_main.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * @file mvu_main.cpp
- * @author Ryan Curtin
- *
- * Executable for MVU.
- *
- * Note: this implementation of MVU does not work.  See #189.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-#include "mvu.hpp"
-
-PROGRAM_INFO("Maximum Variance Unfolding (MVU)", "This program implements "
-    "Maximum Variance Unfolding, a nonlinear dimensionality reduction "
-    "technique.  The method minimizes dimensionality by unfolding a manifold "
-    "such that the distances to the nearest neighbors of each point are held "
-    "constant.");
-
-PARAM_STRING_IN_REQ("input_file", "Filename of input dataset.", "i");
-PARAM_INT_IN_REQ("new_dim", "New dimensionality of dataset.", "d");
-
-PARAM_STRING_OUT("output_file", "Filename to save unfolded dataset to.", "o");
-PARAM_INT_IN("num_neighbors", "Number of nearest neighbors to consider while "
-    "unfolding.", "k", 5);
-
-using namespace mlpack;
-using namespace mlpack::mvu;
-using namespace mlpack::math;
-using namespace arma;
-using namespace std;
-
-int main(int argc, char **argv)
-{
-  // Read from command line.
-  CLI::ParseCommandLine(argc, argv);
-  const string inputFile = CLI::GetParam<string>("input_file");
-  const string outputFile = CLI::GetParam<string>("output_file");
-  const int newDim = CLI::GetParam<int>("new_dim");
-  const int numNeighbors = CLI::GetParam<int>("num_neighbors");
-
-  if (!CLI::HasParam("output_file"))
-    Log::Warn << "--output_file (-o) is not specified; no results will be "
-        << "saved!" << endl;
-
-  RandomSeed(time(NULL));
-
-  // Load input dataset.
-  mat data;
-  data::Load(inputFile, data, true);
-
-  // Verify that the requested dimensionality is valid.
-  if (newDim <= 0 || newDim > (int) data.n_rows)
-  {
-    Log::Fatal << "Invalid new dimensionality (" << newDim << ").  Must be "
-      << "between 1 and the input dataset dimensionality (" << data.n_rows
-      << ")." << std::endl;
-  }
-
-  // Verify that the number of neighbors is valid.
-  if (numNeighbors <= 0 || numNeighbors > (int) data.n_cols)
-  {
-    Log::Fatal << "Invalid number of neighbors (" << numNeighbors << ").  Must "
-        << "be between 1 and the number of points in the input dataset ("
-        << data.n_cols << ")." << std::endl;
-  }
-
-  // Now run MVU.
-  MVU mvu(data);
-
-  mat output;
-  mvu.Unfold(newDim, numNeighbors, output);
-
-  // Save results to file.
-  if (CLI::HasParam("output_file"))
-    data::Save(outputFile, output, true);
-}
diff --git a/src/mlpack/methods/rmva/CMakeLists.txt b/src/mlpack/methods/rmva/CMakeLists.txt
deleted file mode 100644
index ced53a3..0000000
--- a/src/mlpack/methods/rmva/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# Define the files we need to compile
-# Anything not in this list will not be compiled into mlpack.
-set(SOURCES
-  rmva.hpp
-  rmva_impl.hpp
-)
-
-# Add directory name to sources.
-set(DIR_SRCS)
-foreach(file ${SOURCES})
-  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
-endforeach()
-# Append sources (with directory name) to list of all mlpack sources (used at
-# the parent scope).
-set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_cli_executable(rmva)
diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp
deleted file mode 100644
index 5f4f031..0000000
--- a/src/mlpack/methods/rmva/rmva.hpp
+++ /dev/null
@@ -1,963 +0,0 @@
-/**
- * @file rmva.hpp
- * @author Marcus Edel
- *
- * Definition of the RecurrentNeuralAttention class, which implements the
- * Recurrent Model for Visual Attention.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP
-#define __MLPACK_METHODS_RMVA_RMVA_HPP
-
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-#include <mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp>
-#include <mlpack/methods/ann/layer/vr_class_reward_layer.hpp>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * This class implements the Recurrent Model for Visual Attention, using a
- * variety of possible layer implementations.
- *
- * For more information, see the following paper.
- *
- * @code
- * @article{MnihHGK14,
- *   title={Recurrent Models of Visual Attention},
- *   author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
- *   journal={CoRR},
- *   volume={abs/1406.6247},
- *   year={2014}
- * }
- * @endcode
- *
- * @tparam LocatorType Type of locator network.
- * @tparam LocationSensorType Type of location sensor network.
- * @tparam GlimpseSensorType Type of glimpse sensor network.
- * @tparam GlimpseType Type of glimpse network.
- * @tparam StartType Type of start network.
- * @tparam FeedbackType Type of feedback network.
- * @tparam TransferType Type of transfer network.
- * @tparam ClassifierType Type of classifier network.
- * @tparam RewardPredictorType Type of reward predictor network.
- * @tparam InitializationRuleType Rule used to initialize the weight matrix.
- * @tparam MatType Matrix type (arma::mat or arma::sp_mat).
- */
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType = RandomInitialization,
-  typename MatType = arma::mat
->
-class RecurrentNeuralAttention
-{
- public:
-  //! Convenience typedef for the internal model construction.
-  using NetworkType = RecurrentNeuralAttention<
-      LocatorType,
-      LocationSensorType,
-      GlimpseSensorType,
-      GlimpseType,
-      StartType,
-      FeedbackType,
-      TransferType,
-      ClassifierType,
-      RewardPredictorType,
-      InitializationRuleType,
-      MatType>;
-
-  /**
-   * Construct the RecurrentNeuralAttention object, which will construct the
-   * recurrent model for visual attentionh using the specified networks.
-   *
-   * @param locator The locator network.
-   * @param locationSensor The location sensor network.
-   * @param glimpseSensor The glimpse sensor network.
-   * @param glimpse The glimpse network.
-   * @param start The start network.
-   * @param feedback The feedback network.
-   * @param transfer The transfer network.
-   * @param classifier The classifier network.
-   * @param rewardPredictor The reward predictor network.
-   * @param nStep Number of steps for the back-propagate through time.
-   * @param initializeRule Rule used to initialize the weight matrix.
-   */
-  template<typename TypeLocator,
-           typename TypeLocationSensor,
-           typename TypeGlimpseSensor,
-           typename TypeGlimpse,
-           typename TypeStart,
-           typename TypeFeedback,
-           typename TypeTransfer,
-           typename TypeClassifier,
-           typename TypeRewardPredictor>
-  RecurrentNeuralAttention(TypeLocator&& locator,
-                           TypeLocationSensor&& locationSensor,
-                           TypeGlimpseSensor&& glimpseSensor,
-                           TypeGlimpse&& glimpse,
-                           TypeStart&& start,
-                           TypeFeedback&& feedback,
-                           TypeTransfer&& transfer,
-                           TypeClassifier&& classifier,
-                           TypeRewardPredictor&& rewardPredictor,
-                           const size_t nStep,
-                           InitializationRuleType initializeRule =
-                              InitializationRuleType());
-  /**
-   * Train the network on the given input data using the given optimizer.
-   *
-   * This will use the existing model parameters as a starting point for the
-   * optimization. If this is not what you want, then you should access the
-   * parameters vector directly with Parameters() and modify it as desired.
-   *
-   * @tparam OptimizerType Type of optimizer to use to train the model.
-   * @param predictors Input training variables.
-   * @param responses Outputs results from input training variables.
-   * @param optimizer Instantiated optimizer used to train the model.
-   */
-  template<
-      template<typename> class OptimizerType = mlpack::optimization::RMSprop
-  >
-  void Train(const arma::mat& predictors,
-             const arma::mat& responses,
-             OptimizerType<NetworkType>& optimizer);
-
-  /**
-   * Predict the responses to a given set of predictors. The responses will
-   * reflect the output of the given output layer as returned by the
-   * OutputClass() function.
-   *
-   * @param predictors Input predictors.
-   * @param responses Matrix to put output predictions of responses into.
-   */
-  void Predict(arma::mat& predictors, arma::mat& responses);
-
-  /**
-   * Evaluate the network with the given parameters. This function is usually
-   * called by the optimizer to train the model.
-   *
-   * @param parameters Matrix model parameters.
-   * @param i Index of point to use for objective function evaluation.
-   * @param deterministic Whether or not to train or test the model. Note some
-   * layer act differently in training or testing mode.
-   */
-  double Evaluate(const arma::mat& parameters,
-                  const size_t i,
-                  const bool deterministic = true);
-
-  /**
-   * Evaluate the gradient of the network with the given parameters, and with
-   * respect to only one point in the dataset. This is useful for
-   * optimizers such as SGD, which require a separable objective function.
-   *
-   * @param parameters Matrix of the model parameters to be optimized.
-   * @param i Index of points to use for objective function gradient evaluation.
-   * @param gradient Matrix to output gradient into.
-   */
-  void Gradient(const arma::mat& parameters,
-                const size_t i,
-                arma::mat& gradient);
-
-  //! Return the number of separable functions (the number of predictor points).
-  size_t NumFunctions() const { return numFunctions; }
-
-  //! Return the initial point for the optimization.
-  const arma::mat& Parameters() const { return parameter; }
-  //! Modify the initial point for the optimization.
-  arma::mat& Parameters() { return parameter; }
-
-  //! Return the number of steps to back-propagate through time.
-  const size_t& Rho() const { return nStep; }
-  //! Modify the number of steps to back-propagate through time.
-  size_t& Rho() { return nStep; }
-
-  //! Return the current location.
-  const arma::mat& Location();
-
-  //! Serialize the model.
-  template<typename Archive>
-  void Serialize(Archive& ar, const unsigned int /* version */);
-
- private:
-  /*
-   * Predict the response of the given input matrix.
-   */
-  template <typename InputType, typename OutputType>
-  void SinglePredict(const InputType& input, OutputType& output)
-  {
-    // Get the locator input size.
-    if (!inputSize)
-    {
-      inputSize = NetworkInputSize(locator);
-    }
-
-    // Reset networks.
-    ResetParameter(locator);
-    ResetParameter(locationSensor);
-    ResetParameter(glimpseSensor);
-    ResetParameter(glimpse);
-    ResetParameter(feedback);
-    ResetParameter(transfer);
-    ResetParameter(classifier);
-    ResetParameter(rewardPredictor);
-    ResetParameter(start);
-
-    // Sample an initial starting actions by forwarding zeros through the
-    // locator.
-    locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
-        input.n_slices)));
-
-    // Forward pass throught the recurrent network.
-    for (step = 0; step < nStep; step++)
-    {
-      // Locator forward pass.
-      Forward(locatorInput.back(), locator);
-
-      // Location sensor forward pass.
-      Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
-          locator).OutputParameter(), locationSensor);
-
-      // Set the location parameter for all layer that implement a Location
-      // function e.g. GlimpseLayer.
-      ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
-          locator).OutputParameter(), glimpseSensor);
-
-      // Glimpse sensor forward pass.
-      Forward(input, glimpseSensor);
-
-      // Concat the parameter activation from the location sensor and
-      // glimpse sensor.
-      arma::mat concatLayerOutput = arma::join_cols(
-          std::get<std::tuple_size<LocationSensorType>::value - 1>(
-          locationSensor).OutputParameter(),
-          std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
-          glimpseSensor).OutputParameter());
-
-      // Glimpse forward pass.
-      Forward(concatLayerOutput, glimpse);
-
-      if (step == 0)
-      {
-        // Start forward pass.
-        Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
-            glimpse).OutputParameter(), start);
-
-        // Transfer forward pass.
-        Forward(std::get<std::tuple_size<StartType>::value - 1>(
-            start).OutputParameter(), transfer);
-      }
-      else
-      {
-        // Feedback forward pass.
-        Forward(std::get<std::tuple_size<TransferType>::value - 1>(
-            transfer).OutputParameter(), feedback);
-
-        arma::mat feedbackLayerOutput =
-          std::get<std::tuple_size<GlimpseType>::value - 1>(
-          glimpse).OutputParameter() +
-          std::get<std::tuple_size<FeedbackType>::value - 1>(
-          feedback).OutputParameter();
-
-        // Transfer forward pass.
-        Forward(feedbackLayerOutput, transfer);
-      }
-
-      // Update the input for the next run
-      locatorInput.push_back(new arma::cube(
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
-          locatorInput.back().n_cols, locatorInput.back().n_slices));
-    }
-
-    // Classifier forward pass.
-    Forward(locatorInput.back().slice(0), classifier);
-
-    output = std::get<std::tuple_size<ClassifierType>::value - 1>(
-        classifier).OutputParameter();
-  }
-
-  /**
-   * Update the layer reward for all layer that implement the Rewards function.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetReward(const double reward, std::tuple<Tp...>& network)
-  {
-    SetReward(reward, std::get<I>(network));
-    ResetReward<I + 1, Tp...>(reward, network);
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetReward(const double /* reward */, std::tuple<Tp...>& /* network */)
-  {
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      HasRewardCheck<T, double&(T::*)()>::value, void>::type
-  SetReward(const double reward, T& layer)
-  {
-    layer.Reward() = reward;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasRewardCheck<T, double&(T::*)()>::value, void>::type
-  SetReward(const double /* reward */, T& /* layer */)
-  {
-    /* Nothing to do here */
-  }
-
-  /**
-   * Reset the network by clearing the delta and by setting the layer status.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& /* network */) { /* Nothing to do here */ }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetParameter(std::tuple<Tp...>& network)
-  {
-    ResetDeterministic(std::get<I>(network));
-    std::get<I>(network).Delta().zeros();
-
-    ResetParameter<I + 1, Tp...>(network);
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& layer)
-  {
-    layer.Deterministic() = deterministic;
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
-  ResetDeterministic(T& /* layer */) { /* Nothing to do here */ }
-
-  /**
-   * Reset the location by updating the location for all layer that implement
-   * the Location function.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ResetLocation(const arma::mat& /* location */,
-                std::tuple<Tp...>& /* network */)
-  {
-    // Nothing to do here.
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ResetLocation(const arma::mat& location, std::tuple<Tp...>& network)
-  {
-    SetLocation(std::get<I>(network), location);
-    ResetLocation<I + 1, Tp...>(location, network);
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
-  SetLocation(T& layer, const arma::mat& location)
-  {
-    layer.Location(location);
-  }
-
-  template<typename T>
-  typename std::enable_if<
-      !HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
-  SetLocation(T& /* layer */, const arma::mat& /* location */)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Save the network layer activations.
-   */
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<MatType>& activations,
-                  std::tuple<Tp...>& network,
-                  size_t& activationCounter)
-  {
-    Save(I, activations, std::get<I>(network),
-        std::get<I>(network).InputParameter());
-
-    activationCounter++;
-    SaveActivations<I + 1, Tp...>(activations, network, activationCounter);
-  }
-
-  template<size_t I = 0, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<MatType>& /* activations */,
-                  std::tuple<Tp...>& /* network */,
-                  size_t& /* activationCounter */)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t /* layerNumber */,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* unused */)
-  {
-    activations.push_back(new MatType(layer.RecurrentParameter()));
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Save(const size_t /* layerNumber */,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* unused */)
-  {
-    activations.push_back(new MatType(layer.OutputParameter()));
-  }
-
-  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<DataTypeA>& activationsA,
-                  boost::ptr_vector<DataTypeB>& activationsB,
-                  size_t& dataTypeACounter,
-                  size_t& dataTypeBCounter,
-                  std::tuple<Tp...>& network)
-  {
-    Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
-        std::get<I>(network), std::get<I>(network).OutputParameter());
-
-    SaveActivations<I + 1, DataTypeA, DataTypeB, Tp...>(
-        activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
-        network);
-  }
-
-  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  SaveActivations(boost::ptr_vector<DataTypeA>& /* activationsA */,
-                  boost::ptr_vector<DataTypeB>& /* activationsB */,
-                  size_t& /* dataTypeACounter */,
-                  size_t& /* dataTypeBCounter */,
-                  std::tuple<Tp...>& /* network */)
-  {
-    // Nothing to do here.
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Save(boost::ptr_vector<DataTypeA>& activationsA,
-        boost::ptr_vector<DataTypeB>& /* activationsB */,
-       size_t& dataTypeACounter,
-       size_t& /* dataTypeBCounter */,
-       T& layer,
-       DataTypeA& /* unused */)
-  {
-    activationsA.push_back(new DataTypeA(layer.OutputParameter()));
-    dataTypeACounter++;
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Save(boost::ptr_vector<DataTypeA>& /* activationsA */,
-            boost::ptr_vector<DataTypeB>& activationsB,
-            size_t& /* dataTypeACounter */,
-            size_t& dataTypeBCounter,
-            T& layer,
-            DataTypeB& /* unused */)
-  {
-    activationsB.push_back(new DataTypeB(layer.OutputParameter()));
-    dataTypeBCounter++;
-  }
-
-  /**
-   * Load the network layer activations.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<MatType>& /* activations */,
-                  size_t& /* activationCounter */,
-                  std::tuple<Tp...>& network)
-  {
-    std::get<0>(network).InputParameter() = input;
-    LinkParameter(network);
-  }
-
-  template<size_t I = 0, typename DataType, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<MatType>& activations,
-                  size_t& activationCounter,
-                  std::tuple<Tp...>& network)
-  {
-    Load(--activationCounter, activations,
-        std::get<sizeof...(Tp) - I - 1>(network),
-        std::get<I>(network).InputParameter());
-
-    LoadActivations<I + 1, DataType, Tp...>(input, activations,
-        activationCounter, network);
-  }
-
-  /**
-   * Distinguish between recurrent layer and non-recurrent layer when storing
-   * the activations.
-   */
-  template<typename T, typename P>
-  typename std::enable_if<
-      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* output */)
-  {
-    layer.RecurrentParameter() = activations[layerNumber];
-  }
-
-  template<typename T, typename P>
-  typename std::enable_if<
-      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
-  Load(const size_t layerNumber,
-       boost::ptr_vector<MatType>& activations,
-       T& layer,
-       P& /* output */)
-  {
-    layer.OutputParameter() = activations[layerNumber];
-  }
-
-  template<size_t I = 0,
-           typename DataType,
-           typename DataTypeA,
-           typename DataTypeB,
-           typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<DataTypeA>& activationsA,
-                  boost::ptr_vector<DataTypeB>& activationsB,
-                  size_t& dataTypeACounter,
-                  size_t& dataTypeBCounter,
-                  std::tuple<Tp...>& network)
-  {
-    Load(activationsA,
-         activationsB,
-         dataTypeACounter,
-         dataTypeBCounter,
-         std::get<sizeof...(Tp) - I - 1>(network),
-         std::get<sizeof...(Tp) - I - 1>(network).OutputParameter());
-
-    LoadActivations<I + 1, DataType, DataTypeA, DataTypeB, Tp...>(
-        input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
-        network);
-  }
-
-  template<size_t I = 0,
-           typename DataType,
-           typename DataTypeA,
-           typename DataTypeB,
-           typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LoadActivations(DataType& input,
-                  boost::ptr_vector<DataTypeA>& /* activationsA */,
-                  boost::ptr_vector<DataTypeB>& /* activationsB */,
-                  size_t& /* dataTypeACounter */,
-                  size_t& /* dataTypeBCounter */,
-                  std::tuple<Tp...>& network)
-  {
-    std::get<0>(network).InputParameter() = input;
-    LinkParameter(network);
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Load(boost::ptr_vector<DataTypeA>& activationsA,
-            boost::ptr_vector<DataTypeB>& /* activationsB */,
-            size_t& dataTypeACounter,
-            size_t& /* dataTypeBCounter */,
-            T& layer,
-            DataTypeA& /* output */)
-  {
-    layer.OutputParameter() = activationsA[--dataTypeACounter];
-  }
-
-  template<typename T, typename DataTypeA, typename DataTypeB>
-  void Load(boost::ptr_vector<DataTypeA>& /* activationsA */,
-            boost::ptr_vector<DataTypeB>& activationsB,
-            size_t& /* dataTypeACounter */,
-            size_t& dataTypeBCounter,
-            T& layer,
-            DataTypeB& /* output */)
-  {
-    layer.OutputParameter() = activationsB[--dataTypeBCounter];
-  }
-
-  /**
-   * Run a single iteration of the feed forward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 0, typename DataType, typename... Tp>
-  void Forward(const DataType& input, std::tuple<Tp...>& t)
-  {
-    std::get<I>(t).InputParameter() = input;
-    std::get<I>(t).Forward(std::get<I>(t).InputParameter(),
-        std::get<I>(t).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(t);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& network)
-  {
-    LinkParameter(network);
-  }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  ForwardTail(std::tuple<Tp...>& t)
-  {
-    std::get<I>(t).Forward(std::get<I - 1>(t).OutputParameter(),
-        std::get<I>(t).OutputParameter());
-
-    ForwardTail<I + 1, Tp...>(t);
-  }
-
-  /**
-   * Run a single iteration of the backward algorithm, using the given
-   * input and target vector, store the calculated error into the error
-   * vector.
-   */
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<sizeof...(Tp) == 1, void>::type
-  Backward(const DataType& error, std::tuple<Tp ...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-      std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
-      std::get<sizeof...(Tp) - I>(t).Delta());
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  Backward(const DataType& error, std::tuple<Tp ...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-        std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
-        std::get<sizeof...(Tp) - I>(t).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, t);
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& /* error */, std::tuple<Tp...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
-        std::get<sizeof...(Tp) - I>(t).Delta());
-  }
-
-  template<size_t I = 1, typename DataType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp)), void>::type
-  BackwardTail(const DataType& error, std::tuple<Tp...>& t)
-  {
-    std::get<sizeof...(Tp) - I>(t).Backward(
-        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
-        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
-        std::get<sizeof...(Tp) - I>(t).Delta());
-
-    BackwardTail<I + 1, DataType, Tp...>(error, t);
-  }
-
-  /**
-   * Link the calculated activation with the correct layer.
-   */
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I == sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp ...>& /* network */) { /* Nothing to do here */ }
-
-  template<size_t I = 1, typename... Tp>
-  typename std::enable_if<I < sizeof...(Tp), void>::type
-  LinkParameter(std::tuple<Tp...>& network)
-  {
-    if (!LayerTraits<typename std::remove_reference<
-        decltype(std::get<I>(network))>::type>::IsBiasLayer)
-    {
-      std::get<I>(network).InputParameter() = std::get<I - 1>(
-          network).OutputParameter();
-    }
-
-    LinkParameter<I + 1, Tp...>(network);
-  }
-
-  /**
-   * Iterate through all layer modules and update the the gradient using the
-   * layer defined optimizer.
-   */
-  template<typename InputType, typename ErrorType, typename... Tp>
-  void UpdateGradients(const InputType& input,
-                       const ErrorType& error,
-                       std::tuple<Tp...>& network)
-  {
-     Update(std::get<0>(network),
-           input,
-           std::get<1>(network).Delta(),
-           std::get<1>(network).OutputParameter());
-
-     UpdateGradients<1, ErrorType, Tp...>(error, network);
-  }
-
-  template<size_t I = 0, typename ErrorType, typename... Tp>
-  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
-  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network),
-           std::get<I>(network).InputParameter(),
-           std::get<I + 1>(network).Delta(),
-           std::get<I>(network).OutputParameter());
-
-    UpdateGradients<I + 1, ErrorType, Tp...>(error, network);
-  }
-
-  template<size_t I = 0, typename ErrorType, typename... Tp>
-  typename std::enable_if<I == (sizeof...(Tp) - 1), void>::type
-  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
-  {
-    Update(std::get<I>(network),
-       std::get<I>(network).InputParameter(),
-       error,
-       std::get<I>(network).OutputParameter());
-  }
-
-  template<typename LayerType,
-           typename InputType,
-           typename ErrorType,
-           typename GradientType>
-  typename std::enable_if<
-      HasGradientCheck<LayerType,
-          void(LayerType::*)(const InputType&,
-                             const ErrorType&,
-                             GradientType&)>::value, void>::type
-  Update(LayerType& layer,
-         const InputType& input,
-         const ErrorType& error,
-         GradientType& /* gradient */)
-  {
-    layer.Gradient(input, error, layer.Gradient());
-  }
-
-  template<typename LayerType,
-           typename InputType,
-           typename ErrorType,
-           typename GradientType>
-  typename std::enable_if<
-      !HasGradientCheck<LayerType,
-          void(LayerType::*)(const InputType&,
-                             const ErrorType&,
-                             GradientType&)>::value, void>::type
-  Update(LayerType& /* layer */,
-         const InputType& /* input */,
-         const ErrorType& /* error */,
-         GradientType& /* gradient */)
-  {
-    // Nothing to do here
-  }
-
-  //! The locator network.
-  LocatorType locator;
-
-  //! The location sensor network.
-  LocationSensorType locationSensor;
-
-  //! The glimpse sensor network.
-  GlimpseSensorType glimpseSensor;
-
-  //! The glimpse network.
-  GlimpseType glimpse;
-
-  //! The start network.
-  StartType start;
-
-  //! The feedback network.
-  FeedbackType feedback;
-
-  //! The transfer network.
-  TransferType transfer;
-
-  //! The classifier network.
-  ClassifierType classifier;
-
-  //! The reward predictor network.
-  RewardPredictorType rewardPredictor;
-
-  //! The number of steps for the back-propagate through time.
-  size_t nStep;
-
-  //! Locally stored network input size.
-  size_t inputSize;
-
-  //! The current evaluation mode (training or testing).
-  bool deterministic;
-
-  //! The index of the current step.
-  size_t step;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the glimpse network.
-  boost::ptr_vector<arma::mat> glimpseActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the locator network.
-  boost::ptr_vector<arma::mat> locatorActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the feedback network.
-  boost::ptr_vector<arma::mat> feedbackActivations;
-
-  //! The activation storage we are using to save the feedback network input.
-  boost::ptr_vector<arma::mat> feedbackActivationsInput;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the transfer network.
-  boost::ptr_vector<arma::mat> transferActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the location sensor network.
-  boost::ptr_vector<arma::mat> locationSensorActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the glimpse sensor network.
-  boost::ptr_vector<arma::mat> glimpseSensorMatActivations;
-  boost::ptr_vector<arma::cube> glimpseSensorCubeActivations;
-
-  //! The activation storage we are using to perform the feed backward pass for
-  //! the locator input.
-  boost::ptr_vector<arma::cube> locatorInput;
-
-  //! The storage we are using to save the location.
-  boost::ptr_vector<arma::mat> location;
-
-  //! The current number of activations in the glimpse sensor network.
-  size_t glimpseSensorMatCounter;
-  size_t glimpseSensorCubeCounter;
-
-  //! The current number of activations in the glimpse network.
-  size_t glimpseActivationsCounter;
-
-  //! The current number of activations in the glimpse start network.
-  size_t startActivationsCounter;
-
-  //! The current number of activations in the feedback network.
-  size_t feedbackActivationsCounter;
-
-  //! The current number of activations in the transfer network.
-  size_t transferActivationsCounter;
-
-  //! The current number of activations in the locator network.
-  size_t locatorActivationsCounter;
-
-  //! The current number of activations in the location sensor network.
-  size_t locationSensorActivationsCounter;
-
-  //! The current number of activations in the glimpse sensor network.
-  size_t glimpseSensorMatActivationsCounter;
-  size_t glimpseSensorCubeActivationsCounter;
-
-  //! The current number of location for the location storage.
-  size_t locationCounter;
-
-  //! Matrix of (trained) parameters.
-  arma::mat parameter;
-
-  //! The matrix of data points (predictors).
-  arma::mat predictors;
-
-  //! The matrix of responses to the input data points.
-  arma::mat responses;
-
-  //! The number of separable functions (the number of predictor points).
-  size_t numFunctions;
-
-  //! Storage the merge the reward input.
-  arma::field<arma::mat> rewardInput;
-
-  //! The current input.
-  arma::cube input;
-
-  //! The current target.
-  arma::mat target;
-
-  //! Locally stored performance functions.
-  NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction;
-  VRClassRewardLayer<> vRClassRewardFunction;
-
-  //! Locally stored size of the locator network.
-  size_t locatorSize;
-
-  //! Locally stored size of the location sensor network.
-  size_t locationSensorSize;
-
-  //! Locally stored size of the glimpse sensor network.
-  size_t glimpseSensorSize;
-
-  //! Locally stored size of the glimpse network.
-  size_t glimpseSize;
-
-  //! Locally stored size of the start network.
-  size_t startSize;
-
-  //! Locally stored size of the feedback network.
-  size_t feedbackSize;
-
-  //! Locally stored size of the transfer network.
-  size_t transferSize;
-
-  //! Locally stored size of the classifier network.
-  size_t classifierSize;
-
-  //! Locally stored size of the reward predictor network.
-  size_t rewardPredictorSize;
-
-  //! Locally stored recurrent gradient.
-  arma::mat recurrentGradient;
-
-  //! Locally stored action error.
-  arma::mat actionError;
-
-  //! Locally stored current location.
-  arma::mat evaluationLocation;
-}; // class RecurrentNeuralAttention
-
-} // namespace ann
-} // namespace mlpack
-
-// Include implementation.
-#include "rmva_impl.hpp"
-
-#endif
diff --git a/src/mlpack/methods/rmva/rmva_impl.hpp b/src/mlpack/methods/rmva/rmva_impl.hpp
deleted file mode 100644
index cfb310b..0000000
--- a/src/mlpack/methods/rmva/rmva_impl.hpp
+++ /dev/null
@@ -1,740 +0,0 @@
-/**
- * @file rmva_impl.hpp
- * @author Marcus Edel
- *
- * Implementation of the Recurrent Model for Visual Attention.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#ifndef __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
-#define __MLPACK_METHODS_RMVA_RMVA_IMPL_HPP
-
-// In case it hasn't been included yet.
-#include "rmva.hpp"
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-template<
-    typename TypeLocator,
-    typename TypeLocationSensor,
-    typename TypeGlimpseSensor,
-    typename TypeGlimpse,
-    typename TypeStart,
-    typename TypeFeedback,
-    typename TypeTransfer,
-    typename TypeClassifier,
-    typename TypeRewardPredictor
->
-RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::RecurrentNeuralAttention(TypeLocator&& locator,
-                            TypeLocationSensor&& locationSensor,
-                            TypeGlimpseSensor&& glimpseSensor,
-                            TypeGlimpse&& glimpse,
-                            TypeStart&& start,
-                            TypeFeedback&& feedback,
-                            TypeTransfer&& transfer,
-                            TypeClassifier&& classifier,
-                            TypeRewardPredictor&& rewardPredictor,
-                            const size_t nStep,
-                            InitializationRuleType initializeRule) :
-    locator(std::forward<TypeLocator>(locator)),
-    locationSensor(std::forward<TypeLocationSensor>(locationSensor)),
-    glimpseSensor(std::forward<TypeGlimpseSensor>(glimpseSensor)),
-    glimpse(std::forward<TypeGlimpse>(glimpse)),
-    start(std::forward<TypeStart>(start)),
-    feedback(std::forward<TypeFeedback>(feedback)),
-    transfer(std::forward<TypeTransfer>(transfer)),
-    classifier(std::forward<TypeClassifier>(classifier)),
-    rewardPredictor(std::forward<TypeRewardPredictor>(rewardPredictor)),
-    nStep(nStep),
-    inputSize(0)
-{
-  // Set the network size.
-  locatorSize = NetworkSize(this->locator);
-  locationSensorSize = NetworkSize(this->locationSensor);
-  glimpseSensorSize = NetworkSize(this->glimpseSensor);
-  glimpseSize = NetworkSize(this->glimpse);
-  feedbackSize = NetworkSize(this->feedback);
-  transferSize = NetworkSize(this->transfer);
-  classifierSize = NetworkSize(this->classifier);
-  rewardPredictorSize = NetworkSize(this->rewardPredictor);
-  startSize = NetworkSize(this->start);
-
-  initializeRule.Initialize(parameter, locatorSize + locationSensorSize + glimpseSensorSize +
-      glimpseSize + feedbackSize + transferSize + classifierSize + rewardPredictorSize + startSize, 1);
-
-  // Set the network weights.
-  NetworkWeights(initializeRule, parameter, this->locator);
-  NetworkWeights(initializeRule, parameter, this->locationSensor, locatorSize);
-  NetworkWeights(initializeRule, parameter, this->glimpseSensor, locatorSize +
-      locationSensorSize);
-  NetworkWeights(initializeRule, parameter, this->glimpse, locatorSize +
-      locationSensorSize + glimpseSensorSize);
-  NetworkWeights(initializeRule, parameter, this->feedback, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize);
-  NetworkWeights(initializeRule, parameter, this->transfer, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize);
-  NetworkWeights(initializeRule, parameter, this->classifier, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize);
-  NetworkWeights(initializeRule, parameter, this->rewardPredictor, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize + classifierSize);
-  NetworkWeights(initializeRule, parameter, this->start, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize + classifierSize + rewardPredictorSize);
-
-  rewardInput = arma::field<arma::mat>(2, 1);
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-template<template<typename> class OptimizerType>
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Train(const arma::mat& predictors,
-         const arma::mat& responses,
-         OptimizerType<NetworkType>& optimizer)
-{
-  numFunctions = predictors.n_cols;
-  this->predictors = predictors;
-  this->responses = responses;
-
-  // Train the model.
-  Timer::Start("ffn_optimization");
-  const double out = optimizer.Optimize(parameter);
-  Timer::Stop("ffn_optimization");
-
-  Log::Info << "FFN::FFN(): final objective of trained model is " << out
-      << "." << std::endl;
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Predict(arma::mat& predictors, arma::mat& responses)
-{
-  deterministic = true;
-
-  arma::mat responsesTemp;
-  SinglePredict(arma::cube(predictors.colptr(0), 28, 28, 1), responsesTemp);
-
-  responses = arma::mat(responsesTemp.n_elem, predictors.n_cols);
-  responses.col(0) = responsesTemp.col(0);
-
-  for (size_t i = 1; i < predictors.n_cols; i++)
-  {
-    SinglePredict(arma::cube(predictors.colptr(i), 28, 28, 1), responsesTemp);
-    responses.col(i) = responsesTemp.col(0);
-  }
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-double RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Evaluate(const arma::mat& /* unused */,
-            const size_t i,
-            const bool deterministic)
-{
-  this->deterministic = deterministic;
-
-  input = arma::cube(predictors.colptr(i), 28, 28, 1);
-  target = arma::mat(responses.colptr(i), responses.n_rows, 1, false, true);
-
-  // Get the locator input size.
-  if (!inputSize)
-  {
-    inputSize = NetworkInputSize(locator);
-  }
-
-  glimpseSensorMatCounter = 0;
-  glimpseSensorCubeCounter = 0;
-  glimpseActivationsCounter = 0;
-  locatorActivationsCounter = 0;
-  locationSensorActivationsCounter = 0;
-  glimpseSensorMatActivationsCounter = 0;
-  glimpseSensorCubeActivationsCounter = 0;
-  locationCounter = 0;
-  feedbackActivationsCounter = 0;
-  transferActivationsCounter = 0;
-
-  // Reset networks.
-  ResetParameter(locator);
-  ResetParameter(locationSensor);
-  ResetParameter(glimpseSensor);
-  ResetParameter(glimpse);
-  ResetParameter(feedback);
-  ResetParameter(transfer);
-  ResetParameter(classifier);
-  ResetParameter(rewardPredictor);
-  ResetParameter(start);
-
-  // Reset activation storage.
-  glimpseActivations.clear();
-  locatorActivations.clear();
-  locationSensorActivations.clear();
-  glimpseSensorMatActivations.clear();
-  glimpseSensorCubeActivations.clear();
-  feedbackActivations.clear();
-  transferActivations.clear();
-  locatorInput.clear();
-  location.clear();
-  feedbackActivationsInput.clear();
-
-  // Sample an initial starting actions by forwarding zeros through the locator.
-  locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
-      input.n_slices)));
-
-  // Forward pass throught the recurrent network.
-  for (step = 0; step < nStep; step++)
-  {
-    // Locator forward pass.
-    Forward(locatorInput.back(), locator);
-    SaveActivations(locatorActivations, locator, locatorActivationsCounter);
-
-    // Location sensor forward pass.
-    Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter(), locationSensor);
-    SaveActivations(locationSensorActivations, locationSensor,
-        locationSensorActivationsCounter);
-
-    // Set the location parameter for all layer that implement a Location
-    // function e.g. GlimpseLayer.
-    ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter(), glimpseSensor);
-
-    // Save the location for the backward path.
-    location.push_back(new arma::mat(std::get<std::tuple_size<
-        LocatorType>::value - 1>(locator).OutputParameter()));
-
-    // Glimpse sensor forward pass.
-    Forward(input, glimpseSensor);
-    SaveActivations(glimpseSensorMatActivations, glimpseSensorCubeActivations,
-        glimpseSensorMatCounter, glimpseSensorCubeCounter, glimpseSensor);
-
-    // Concat the parameter activation from the location sensor and
-    // glimpse sensor.
-    arma::mat concatLayerOutput = arma::join_cols(
-        std::get<std::tuple_size<LocationSensorType>::value - 1>(
-        locationSensor).OutputParameter(),
-        std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
-        glimpseSensor).OutputParameter());
-
-    // Glimpse forward pass.
-    Forward(concatLayerOutput, glimpse);
-    SaveActivations(glimpseActivations, glimpse, glimpseActivationsCounter);
-
-    if (step == 0)
-    {
-      // Start forward pass.
-      Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
-          glimpse).OutputParameter(), start);
-
-      // Transfer forward pass.
-      Forward(std::get<std::tuple_size<StartType>::value - 1>(
-          start).OutputParameter(), transfer);
-      SaveActivations(transferActivations, transfer,
-          transferActivationsCounter);
-    }
-    else
-    {
-      // Feedback forward pass.
-      Forward(std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter(), feedback);
-      SaveActivations(feedbackActivations, feedback,
-          feedbackActivationsCounter);
-
-      feedbackActivationsInput.push_back(new arma::mat(
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().memptr(),
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().n_rows,
-          std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter().n_cols));
-
-      arma::mat feedbackLayerOutput =
-        std::get<std::tuple_size<GlimpseType>::value - 1>(
-        glimpse).OutputParameter() +
-        std::get<std::tuple_size<FeedbackType>::value - 1>(
-        feedback).OutputParameter();
-
-      // Transfer forward pass.
-      Forward(feedbackLayerOutput, transfer);
-      SaveActivations(transferActivations, transfer,
-          transferActivationsCounter);
-    }
-
-    // Update the input for the next run
-    locatorInput.push_back(new arma::cube(
-        std::get<std::tuple_size<TransferType>::value - 1>(
-        transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
-        locatorInput.back().n_cols, locatorInput.back().n_slices));
-  }
-
-  // Classifier forward pass.
-  Forward(locatorInput.back().slice(0), classifier);
-
-  // Reward predictor forward pass.
-  Forward(std::get<std::tuple_size<ClassifierType>::value - 1>(
-      classifier).OutputParameter(), rewardPredictor);
-
-  double performanceError = negativeLogLikelihoodFunction.Forward(
-      std::get<std::tuple_size<ClassifierType>::value - 1>(
-      classifier).OutputParameter(), target);
-
-  // Create the input for the vRClassRewardFunction function.
-  // For which we use the output from the classifier and the rewardPredictor.
-  rewardInput(0, 0) = std::get<std::tuple_size<ClassifierType>::value - 1>(
-      classifier).OutputParameter();
-  rewardInput(1, 0) = std::get<std::tuple_size<RewardPredictorType>::value - 1>(
-      rewardPredictor).OutputParameter();
-
-  performanceError += vRClassRewardFunction.Forward(rewardInput, target);
-
-  return performanceError;
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Gradient(const arma::mat& /* unused */,
-            const size_t i,
-            arma::mat& gradient)
-{
-  Evaluate(parameter, i, false);
-
-  // Reset the gradient.
-  if (gradient.is_empty())
-  {
-    gradient = arma::zeros<arma::mat>(parameter.n_rows, parameter.n_cols);
-  }
-  else
-  {
-    gradient.zeros();
-  }
-
-  // Reset the recurrent gradient.
-  if (recurrentGradient.is_empty())
-  {
-    recurrentGradient = arma::zeros<arma::mat>(parameter.n_rows,
-        parameter.n_cols);
-
-    actionError = arma::zeros<arma::mat>(
-        std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter().n_rows,
-        std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter().n_cols);
-  }
-  else
-  {
-    recurrentGradient.zeros();
-  }
-
-  // Set the recurrent gradient.
-  NetworkGradients(recurrentGradient, this->locator);
-  NetworkGradients(recurrentGradient, this->locationSensor, locatorSize);
-  NetworkGradients(recurrentGradient, this->glimpseSensor, locatorSize +
-      locationSensorSize);
-  NetworkGradients(recurrentGradient, this->glimpse, locatorSize +
-      locationSensorSize + glimpseSensorSize);
-  NetworkGradients(recurrentGradient, this->feedback, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize);
-  NetworkGradients(recurrentGradient, this->transfer, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize);
-
-  // Set the gradient.
-  NetworkGradients(gradient, this->classifier, locatorSize + locationSensorSize
-      + glimpseSensorSize + glimpseSize + feedbackSize + transferSize);
-  NetworkGradients(gradient, this->rewardPredictor, locatorSize +
-      locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-      transferSize + classifierSize);
-  NetworkGradients(gradient, this->start, locatorSize + locationSensorSize +
-      glimpseSensorSize + glimpseSize + feedbackSize + transferSize +
-      classifierSize + rewardPredictorSize);
-
-  // Negative log likelihood backward pass.
-  negativeLogLikelihoodFunction.Backward(std::get<std::tuple_size<
-      ClassifierType>::value - 1>(classifier).OutputParameter(), target,
-      negativeLogLikelihoodFunction.OutputParameter());
-
-  const double reward = vRClassRewardFunction.Backward(rewardInput, target,
-      vRClassRewardFunction.OutputParameter());
-
-  // Propogate reward through all modules.
-  ResetReward(reward, locator);
-  ResetReward(reward, locationSensor);
-  ResetReward(reward, glimpseSensor);
-  ResetReward(reward, glimpse);
-  ResetReward(reward, classifier);
-
-  // RewardPredictor backward pass.
-  Backward(vRClassRewardFunction.OutputParameter()(1, 0), rewardPredictor);
-
-  arma::mat classifierError =
-    negativeLogLikelihoodFunction.OutputParameter() +
-    vRClassRewardFunction.OutputParameter()(0, 0) +
-    std::get<0>(rewardPredictor).Delta();
-
-  // Classifier backward pass.
-  Backward(classifierError, classifier);
-
-  // Set the initial recurrent error for the first backward step.
-  arma::mat recurrentError = std::get<0>(classifier).Delta();
-
-  for (step = nStep - 1; nStep >= 0; step--)
-  {
-    // Load the locator activations.
-    LoadActivations(locatorInput[step], locatorActivations,
-        locatorActivationsCounter, locator);
-
-    // Load the location sensor activations.
-    LoadActivations(std::get<std::tuple_size<LocatorType>::value - 1>(
-        locator).OutputParameter(), locationSensorActivations,
-        locationSensorActivationsCounter, locationSensor);
-
-    // Load the glimpse sensor activations.
-    LoadActivations(input, glimpseSensorMatActivations,
-        glimpseSensorCubeActivations, glimpseSensorMatCounter,
-        glimpseSensorCubeCounter, glimpseSensor);
-
-    // Concat the parameter activation from the location and glimpse sensor.
-    arma::mat concatLayerOutput = arma::join_cols(
-        std::get<std::tuple_size<LocationSensorType>::value - 1>(
-        locationSensor).OutputParameter(),
-        std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
-        glimpseSensor).OutputParameter());
-
-    // Load the glimpse activations.
-    LoadActivations(concatLayerOutput, glimpseActivations,
-        glimpseActivationsCounter, glimpse);
-
-
-    if (step == 0)
-    {
-      // Load the transfer activations.
-     LoadActivations(std::get<std::tuple_size<StartType>::value - 1>(
-          start).OutputParameter(), transferActivations,
-          transferActivationsCounter, transfer);
-    }
-    else
-    {
-      // Load the feedback activations.
-      LoadActivations(std::get<std::tuple_size<TransferType>::value - 1>(
-          transfer).OutputParameter(), feedbackActivations,
-          feedbackActivationsCounter, feedback);
-
-      arma::mat feedbackLayerOutput =
-        std::get<std::tuple_size<GlimpseType>::value - 1>(
-        glimpse).OutputParameter() +
-        std::get<std::tuple_size<FeedbackType>::value - 1>(
-        feedback).OutputParameter();
-
-      // Load the transfer activations.
-      LoadActivations(feedbackLayerOutput, transferActivations,
-          transferActivationsCounter, transfer);
-    }
-
-    // Set the location parameter for all layer that implement a Location
-    // function e.g. GlimpseLayer.
-    ResetLocation(location[step], glimpseSensor);
-
-    // Locator backward pass.
-    Backward(actionError, locator);
-
-    // Transfer backward pass.
-    Backward(recurrentError, transfer);
-
-    // glimpse network
-    Backward(std::get<0>(transfer).Delta(), glimpse);
-
-    // Split up the error of the concat layer.
-    arma::mat locationSensorError = std::get<0>(glimpse).Delta().submat(
-        0, 0, std::get<0>(glimpse).Delta().n_elem / 2 - 1, 0);
-    arma::mat glimpseSensorError = std::get<0>(glimpse).Delta().submat(
-        std::get<0>(glimpse).Delta().n_elem / 2, 0,
-        std::get<0>(glimpse).Delta().n_elem - 1, 0);
-
-    // Location sensor backward pass.
-    Backward(locationSensorError, locationSensor);
-
-    // Glimpse sensor backward pass.
-    Backward(glimpseSensorError, glimpseSensor);
-
-    if (step != 0)
-    {
-      // Feedback backward pass.
-      Backward(std::get<0>(transfer).Delta(), feedback);
-    }
-
-    // Update the recurrent network gradients.
-    UpdateGradients(std::get<0>(locationSensor).Delta(), locator);
-    UpdateGradients(std::get<0>(transfer).Delta(), glimpse);
-    UpdateGradients(std::get<0>(transfer).Delta(), locationSensor);
-    UpdateGradients(std::get<0>(transfer).Delta(), glimpseSensor);
-
-    // Feedback module.
-    if (step != 0)
-    {
-      UpdateGradients(feedbackActivationsInput[step - 1],
-          std::get<0>(transfer).Delta(), feedback);
-    }
-    else
-    {
-      // Set the feedback gradient to zero.
-      recurrentGradient.submat(locatorSize + locationSensorSize +
-          glimpseSensorSize + glimpseSize, 0, locatorSize + locationSensorSize +
-          glimpseSensorSize + glimpseSize + feedbackSize - 1, 0).zeros();
-
-      UpdateGradients(std::get<0>(transfer).Delta(), start);
-    }
-
-    // Update the overall recurrent gradient.
-    gradient += recurrentGradient;
-
-    if (step != 0)
-    {
-      // Update the recurrent error for the next backward step.
-      recurrentError = std::get<0>(locator).Delta() +
-          std::get<0>(feedback).Delta();
-    }
-    else
-    {
-      break;
-    }
-  }
-
-  // Reward predictor gradient update.
-  UpdateGradients(vRClassRewardFunction.OutputParameter()(1, 0),
-      rewardPredictor);
-
-  // Classifier gradient update.
-  UpdateGradients(std::get<1>(classifier).Delta(), classifier);
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-const arma::mat& RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Location()
-{
-  if (!location.empty())
-  {
-    evaluationLocation = arma::mat(location[0].n_elem, location.size());
-
-    for (size_t i = 0; i < location.size(); i++)
-    {
-      evaluationLocation.col(i) = arma::vectorise(location[i]);
-    }
-  }
-
-  return evaluationLocation;
-}
-
-template<
-  typename LocatorType,
-  typename LocationSensorType,
-  typename GlimpseSensorType,
-  typename GlimpseType,
-  typename StartType,
-  typename FeedbackType,
-  typename TransferType,
-  typename ClassifierType,
-  typename RewardPredictorType,
-  typename InitializationRuleType,
-  typename MatType
->
-template<typename Archive>
-void RecurrentNeuralAttention<
-  LocatorType,
-  LocationSensorType,
-  GlimpseSensorType,
-  GlimpseType,
-  StartType,
-  FeedbackType,
-  TransferType,
-  ClassifierType,
-  RewardPredictorType,
-  InitializationRuleType,
-  MatType
->::Serialize(Archive& ar, const unsigned int /* version */)
-{
-  ar & data::CreateNVP(parameter, "parameter");
-  ar & data::CreateNVP(inputSize, "inputSize");
-  ar & data::CreateNVP(nStep, "nStep");
-
-  // If we are loading, we need to initialize the weights.
-  if (Archive::is_loading::value)
-  {
-    // Set the netork size.
-    locatorSize = NetworkSize(this->locator);
-    locationSensorSize = NetworkSize(this->locationSensor);
-    glimpseSensorSize = NetworkSize(this->glimpseSensor);
-    glimpseSize = NetworkSize(this->glimpse);
-    feedbackSize = NetworkSize(this->feedback);
-    transferSize = NetworkSize(this->transfer);
-    classifierSize = NetworkSize(this->classifier);
-    rewardPredictorSize = NetworkSize(this->rewardPredictor);
-    startSize = NetworkSize(this->start);
-
-    // Set the network weights.
-    NetworkWeights(parameter, this->locator);
-    NetworkWeights(parameter, this->locationSensor, locatorSize);
-    NetworkWeights(parameter, this->glimpseSensor, locatorSize +
-        locationSensorSize);
-    NetworkWeights(parameter, this->glimpse, locatorSize + locationSensorSize +
-        glimpseSensorSize);
-    NetworkWeights(parameter, this->feedback, locatorSize + locationSensorSize +
-        glimpseSensorSize + glimpseSize);
-    NetworkWeights(parameter, this->transfer, locatorSize + locationSensorSize +
-        glimpseSensorSize + glimpseSize + feedbackSize);
-    NetworkWeights(parameter, this->classifier, locatorSize + locationSensorSize
-        + glimpseSensorSize + glimpseSize + feedbackSize + transferSize);
-    NetworkWeights(parameter, this->rewardPredictor, locatorSize +
-        locationSensorSize + glimpseSensorSize + glimpseSize + feedbackSize +
-        transferSize + classifierSize);
-    NetworkWeights(parameter, this->start, locatorSize + locationSensorSize +
-        glimpseSensorSize + glimpseSize + feedbackSize + transferSize +
-        classifierSize + rewardPredictorSize);
-  }
-}
-
-} // namespace ann
-} // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp
deleted file mode 100644
index 8c95765..0000000
--- a/src/mlpack/methods/rmva/rmva_main.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-/**
- * @file rmva_main.cpp
- * @author Marcus Edel
- *
- * Main executable for the Recurrent Model for Visual Attention.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include "rmva.hpp"
-
-#include <mlpack/methods/ann/layer/glimpse_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/reinforce_normal_layer.hpp>
-#include <mlpack/methods/ann/layer/multiply_constant_layer.hpp>
-#include <mlpack/methods/ann/layer/constant_layer.hpp>
-#include <mlpack/methods/ann/layer/log_softmax_layer.hpp>
-#include <mlpack/methods/ann/layer/hard_tanh_layer.hpp>
-
-#include <mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp>
-#include <mlpack/core/optimizers/sgd/sgd.hpp>
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace mlpack::optimization;
-using namespace std;
-
-PROGRAM_INFO("Recurrent Model for Visual Attention",
-    "This program trains the Recurrent Model for Visual Attention on the given "
-    "labeled training set, or loads a model from the given model file, and then"
-    " may use that trained model to classify the points in a given test set."
-    "\n\n"
-    "Labels are expected to be passed in separately as their own file "
-    "(--labels_file).  If training is not desired, a pre-existing model can be "
-    "loaded with the --input_model_file (-m) option."
-    "\n\n"
-    "If classifying a test set is desired, the test set should be in the file "
-    "specified with the --test_file (-T) option, and the classifications will "
-    "be saved to the file specified with the --output_file (-o) option.  If "
-    "saving a trained model is desired, the --output_model_file (-M) option "
-    "should be given.");
-
-// Model loading/saving.
-PARAM_STRING_IN("input_model_file", "File containing the Recurrent Model for "
-    "Visual Attention.", "m", "");
-PARAM_STRING_OUT("output_model_file", "File to save trained Recurrent Model for"
-    " Visual Attention to.", "M");
-
-// Training parameters.
-PARAM_STRING_IN("training_file", "A file containing the training set.", "t",
-    "");
-PARAM_STRING_IN("labels_file", "A file containing labels for the training set.",
-    "l", "");
-
-PARAM_STRING_IN("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or "
-    "'lbfgs'.", "O", "minibatch-sgd");
-
-PARAM_INT_IN("max_iterations", "Maximum number of iterations for SGD or RMSProp"
-    " (0 indicates no limit).", "n", 500000);
-PARAM_DOUBLE_IN("tolerance", "Maximum tolerance for termination of SGD or "
-    "RMSProp.", "e", 1e-7);
-
-PARAM_DOUBLE_IN("step_size", "Step size for stochastic gradient descent "
-    "(alpha),", "a", 0.01);
-PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are "
-    "visited for SGD or mini-batch SGD.", "L");
-PARAM_INT_IN("batch_size", "Batch size for mini-batch SGD.", "b", 20);
-
-PARAM_INT_IN("rho", "Number of steps for the back-propagate through time.", "r",
-    7);
-
-PARAM_INT_IN("classes", "The number of classes.", "c", 10);
-
-PARAM_INT_IN("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
-
-// Test parameters.
-PARAM_STRING_IN("test_file", "A file containing the test set.", "T", "");
-PARAM_STRING_OUT("output_file", "The file in which the predicted labels for the"
-    " test set will be written.", "o");
-
-int main(int argc, char** argv)
-{
-  CLI::ParseCommandLine(argc, argv);
-
- // Check input parameters.
-  if (CLI::HasParam("training_file") && CLI::HasParam("input_model_file"))
-    Log::Fatal << "Cannot specify both --training_file (-t) and "
-        << "--input_model_file (-m)!" << endl;
-
-  if (!CLI::HasParam("training_file") && !CLI::HasParam("input_model_file"))
-    Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are"
-        << " specified!" << endl;
-
-  if (!CLI::HasParam("training_file") && CLI::HasParam("labels_file"))
-    Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is "
-        << "not specified." << endl;
-
-  if (!CLI::HasParam("output_file") && !CLI::HasParam("output_model_file"))
-    Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) "
-        << "specified; no output will be saved!" << endl;
-
-  if (CLI::HasParam("output_file") && !CLI::HasParam("test_file"))
-    Log::Warn << "--output_file (-o) ignored because no test file specified "
-        << "with --test_file (-T)." << endl;
-
-  if (!CLI::HasParam("output_file") && CLI::HasParam("test_file"))
-    Log::Warn << "--test_file (-T) specified, but classification results will "
-        << "not be saved because --output_file (-o) is not specified." << endl;
-
-  const string optimizerType = CLI::GetParam<string>("optimizer");
-
-  if ((optimizerType != "sgd") && (optimizerType != "lbfgs") &&
-      (optimizerType != "minibatch-sgd"))
-  {
-    Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be "
-        << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl;
-  }
-
-  const double stepSize = CLI::GetParam<double>("step_size");
-  const size_t maxIterations = (size_t) CLI::GetParam<int>("max_iterations");
-  const double tolerance = CLI::GetParam<double>("tolerance");
-  const bool shuffle = !CLI::HasParam("linear_scan");
-  const size_t batchSize = (size_t) CLI::GetParam<int>("batch_size");
-  const size_t rho = (size_t) CLI::GetParam<int>("rho");
-  const size_t numClasses = (size_t) CLI::GetParam<int>("classes");
-
-  const size_t hiddenSize = 256;
-  const double unitPixels = 13;
-  const double locatorStd = 0.11;
-  const size_t imageSize = 28;
-  const size_t locatorHiddenSize = 128;
-  const size_t glimpsePatchSize = 8;
-  const size_t glimpseDepth = 1;
-  const size_t glimpseScale = 2;
-  const size_t glimpseHiddenSize = 128;
-  const size_t imageHiddenSize = 256;
-
-
-  // Locator network.
-  LinearMappingLayer<> linearLayer0(hiddenSize, 2);
-  BiasLayer<> biasLayer0(2, 1);
-  HardTanHLayer<> hardTanhLayer0;
-  ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd);
-  HardTanHLayer<> hardTanhLayer1;
-  MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize);
-  auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0,
-      reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0);
-
-  // Location sensor network.
-  LinearLayer<> linearLayer1(2, locatorHiddenSize);
-  BiasLayer<> biasLayer1(locatorHiddenSize, 1);
-  ReLULayer<> rectifierLayer0;
-  auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0);
-
-  // Glimpse sensor network.
-  GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale);
-  LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize);
-  BiasLayer<> biasLayer2(glimpseHiddenSize, 1);
-  ReLULayer<> rectifierLayer1;
-  auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2,
-      rectifierLayer1);
-
-  // Glimpse network.
-  LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize,
-      imageHiddenSize);
-  BiasLayer<> biasLayer3(imageHiddenSize, 1);
-  ReLULayer<> rectifierLayer2;
-  LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize);
-  BiasLayer<> biasLayer4(hiddenSize, 1);
-  auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2,
-      linearLayer4, biasLayer4);
-
-  // Feedback network.
-  LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize);
-  BiasLayer<> recurrentLayerBias0(hiddenSize, 1);
-  auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0);
-
-  // Start network.
-  AdditionLayer<> startLayer0(hiddenSize, 1);
-  auto start = std::tie(startLayer0);
-
-  // Transfer network.
-  ReLULayer<> rectifierLayer3;
-  auto transfer = std::tie(rectifierLayer3);
-
-  // Classifier network.
-  LinearLayer<> linearLayer5(hiddenSize, numClasses);
-  BiasLayer<> biasLayer6(numClasses, 1);
-  LogSoftmaxLayer<> logSoftmaxLayer0;
-  auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0);
-
-  // Reward predictor network.
-  ConstantLayer<> constantLayer0(1, 1);
-  AdditionLayer<> additionLayer0(1, 1);
-  auto rewardPredictor = std::tie(constantLayer0, additionLayer0);
-
-  // Recurrent Model for Visual Attention.
-  RecurrentNeuralAttention<decltype(locator),
-                           decltype(locationSensor),
-                           decltype(glimpseSensor),
-                           decltype(glimpse),
-                           decltype(start),
-                           decltype(feedback),
-                           decltype(transfer),
-                           decltype(classifier),
-                           decltype(rewardPredictor),
-                           RandomInitialization>
-    net(locator, locationSensor, glimpseSensor, glimpse, start, feedback,
-        transfer, classifier, rewardPredictor, rho);
-
-  // Either we have to train a model, or load a model.
-  if (CLI::HasParam("training_file"))
-  {
-    const string trainingFile = CLI::GetParam<string>("training_file");
-    arma::mat trainingData;
-    data::Load(trainingFile, trainingData, true);
-
-    arma::mat labels;
-
-    // Did the user pass in labels?
-    const string labelsFilename = CLI::GetParam<string>("labels_file");
-    if (labelsFilename != "")
-    {
-      // Load labels.
-      data::Load(labelsFilename, labels, true, false);
-
-      // Do the labels need to be transposed?
-      if (labels.n_cols == 1)
-        labels = labels.t();
-    }
-
-    // Now run the optimization.
-    if (optimizerType == "sgd")
-    {
-      SGD<decltype(net)> opt(net);
-      opt.StepSize() = stepSize;
-      opt.MaxIterations() = maxIterations;
-      opt.Tolerance() = tolerance;
-      opt.Shuffle() = shuffle;
-
-      Timer::Start("rmva_training");
-      net.Train(trainingData, labels, opt);
-      Timer::Stop("rmva_training");
-    }
-    else if (optimizerType == "minibatch-sgd")
-    {
-      MiniBatchSGD<decltype(net)> opt(net);
-      opt.StepSize() = stepSize;
-      opt.MaxIterations() = maxIterations;
-      opt.Tolerance() = tolerance;
-      opt.Shuffle() = shuffle;
-      opt.BatchSize() = batchSize;
-
-      Timer::Start("rmva_training");
-      net.Train(trainingData, labels, opt);
-      Timer::Stop("rmva_training");
-    }
-  }
-  else
-  {
-    // Load the model from file.
-    data::Load(CLI::GetParam<string>("input_model_file"), "rmva_model", net);
-  }
-
-  // Do we need to do testing?
-  if (CLI::HasParam("test_file"))
-  {
-    const string testingDataFilename = CLI::GetParam<std::string>("test_file");
-    arma::mat testingData;
-    data::Load(testingDataFilename, testingData, true);
-
-    // Time the running of the Naive Bayes Classifier.
-    arma::mat results;
-    Timer::Start("rmva_testing");
-    net.Predict(testingData, results);
-    Timer::Stop("rmva_testing");
-
-    if (CLI::HasParam("output_file"))
-    {
-      // Output results.
-      const string outputFilename = CLI::GetParam<string>("output_file");
-      data::Save(outputFilename, results, true);
-    }
-  }
-
-  // Save the model, if requested.
-  if (CLI::HasParam("output_model_file"))
-    data::Save(CLI::GetParam<string>("output_model_file"), "rmva_model", net);
-}
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 3b3ab0d..a43c1b2 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -1,9 +1,6 @@
 # mlpack test executable.
 add_executable(mlpack_test
-  activation_functions_test.cpp
   adaboost_test.cpp
-  adam_test.cpp
-  ada_delta_test.cpp
   akfn_test.cpp
   aknn_test.cpp
   arma_extend_test.cpp
@@ -12,8 +9,6 @@ add_executable(mlpack_test
   binarize_test.cpp
   cf_test.cpp
   cli_test.cpp
-  convolution_test.cpp
-  convolutional_network_test.cpp
   cosine_tree_test.cpp
   decision_stump_test.cpp
   det_test.cpp
@@ -21,7 +16,6 @@ add_executable(mlpack_test
   drusilla_select_test.cpp
   emst_test.cpp
   fastmks_test.cpp
-  feedforward_network_test.cpp
   gmm_test.cpp
   gradient_descent_test.cpp
   hmm_test.cpp
@@ -29,7 +23,6 @@ add_executable(mlpack_test
   hyperplane_test.cpp
   imputation_test.cpp
   ind2sub_test.cpp
-  init_rules_test.cpp
   kernel_test.cpp
   kernel_pca_test.cpp
   kernel_traits_test.cpp
@@ -56,7 +49,6 @@ add_executable(mlpack_test
   mlpack_test.cpp
   nbc_test.cpp
   nca_test.cpp
-  network_util_test.cpp
   nmf_test.cpp
   nystroem_method_test.cpp
   octree_test.cpp
@@ -67,10 +59,8 @@ add_executable(mlpack_test
   radical_test.cpp
   randomized_svd_test.cpp
   range_search_test.cpp
-  recurrent_network_test.cpp
   rectangle_tree_test.cpp
   regularized_svd_test.cpp
-  rmsprop_test.cpp
   sa_test.cpp
   sdp_primal_dual_test.cpp
   sgd_test.cpp
diff --git a/src/mlpack/tests/activation_functions_test.cpp b/src/mlpack/tests/activation_functions_test.cpp
deleted file mode 100644
index bebca0d..0000000
--- a/src/mlpack/tests/activation_functions_test.cpp
+++ /dev/null
@@ -1,328 +0,0 @@
-/**
- * @file activation_functions_test.cpp
- * @author Marcus Edel
- * @author Dhawal Arora
- *
- * Tests for the various activation functions.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/activation_functions/identity_function.hpp>
-#include <mlpack/methods/ann/activation_functions/softsign_function.hpp>
-#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
-#include <mlpack/methods/ann/activation_functions/rectifier_function.hpp>
-
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-#include <mlpack/methods/ann/layer/leaky_relu_layer.hpp>
-#include <mlpack/methods/ann/layer/hard_tanh_layer.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(ActivationFunctionsTest);
-
-// Generate dataset for activation function tests.
-const arma::colvec activationData("-2 3.2 4.5 -100.2 1 -1 2 0");
-
-/*
- * Implementation of the activation function test.
- *
- * @param input Input data used for evaluating the activation function.
- * @param target Target data used to evaluate the activation.
- *
- * @tparam ActivationFunction Activation function used for the check.
- */
-template<class ActivationFunction>
-void CheckActivationCorrect(const arma::colvec input, const arma::colvec target)
-{
-  // Test the activation function using a single value as input.
-  for (size_t i = 0; i < target.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(ActivationFunction::fn(input.at(i)),
-        target.at(i), 1e-3);
-  }
-
-  // Test the activation function using the entire vector as input.
-  arma::colvec activations;
-  ActivationFunction::fn(input, activations);
-  for (size_t i = 0; i < activations.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3);
-  }
-}
-
-/*
- * Implementation of the activation function derivative test.
- *
- * @param input Input data used for evaluating the activation function.
- * @param target Target data used to evaluate the activation.
- *
- * @tparam ActivationFunction Activation function used for the check.
- */
-template<class ActivationFunction>
-void CheckDerivativeCorrect(const arma::colvec input, const arma::colvec target)
-{
-  // Test the calculation of the derivatives using a single value as input.
-  for (size_t i = 0; i < target.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(ActivationFunction::deriv(input.at(i)),
-        target.at(i), 1e-3);
-  }
-
-  // Test the calculation of the derivatives using the entire vector as input.
-  arma::colvec derivatives;
-  ActivationFunction::deriv(input, derivatives);
-  for (size_t i = 0; i < derivatives.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3);
-  }
-}
-
-/*
- * Implementation of the activation function inverse test.
- *
- * @param input Input data used for evaluating the activation function.
- * @param target Target data used to evaluate the activation.
- *
- * @tparam ActivationFunction Activation function used for the check.
- */
-template<class ActivationFunction>
-void CheckInverseCorrect(const arma::colvec input)
-{
-    // Test the calculation of the inverse using a single value as input.
-  for (size_t i = 0; i < input.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(ActivationFunction::inv(ActivationFunction::fn(
-        input.at(i))), input.at(i), 1e-3);
-  }
-
-  // Test the calculation of the inverse using the entire vector as input.
-  arma::colvec activations;
-  ActivationFunction::fn(input, activations);
-  ActivationFunction::inv(activations, activations);
-
-  for (size_t i = 0; i < input.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(activations.at(i), input.at(i), 1e-3);
-  }
-}
-
-/*
- * Implementation of the HardTanH activation function test. The function is
- * implemented as a HardTanH Layer in hard_tanh_layer.hpp
- *
- * @param input Input data used for evaluating the HardTanH activation function.
- * @param target Target data used to evaluate the HardTanH activation.
- */
-void CheckHardTanHActivationCorrect(const arma::colvec input,
-                                    const arma::colvec target)
-{
-  HardTanHLayer<> htf;
-
-  // Test the activation function using the entire vector as input.
-  arma::colvec activations;
-  htf.Forward(input, activations);
-  for (size_t i = 0; i < activations.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3);
-  }
-}
-
-/*
- * Implementation of the HardTanH activation function derivative test. The
- * derivative is implemented as HardTanH Layer in hard_tanh_layer.hpp
- *
- * @param input Input data used for evaluating the HardTanH activation function.
- * @param target Target data used to evaluate the HardTanH activation.
- */
-void CheckHardTanHDerivativeCorrect(const arma::colvec input,
-                                    const arma::colvec target)
-{
-  HardTanHLayer<> htf;
-
-  // Test the calculation of the derivatives using the entire vector as input.
-  arma::colvec derivatives;
-
-  // This error vector will be set to 1 to get the derivatives.
-  arma::colvec error(input.n_elem);
-  htf.Backward(input, (arma::colvec)error.ones(), derivatives);
-  for (size_t i = 0; i < derivatives.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3);
-  }
-}
-
-/*
- * Implementation of the LeakyReLU activation function test. The function is
- * implemented as LeakyReLU layer in the file leaky_relu_layer.hpp
- *
- * @param input Input data used for evaluating the LeakyReLU activation function.
- * @param target Target data used to evaluate the LeakyReLU activation.
- */
-void CheckLeakyReLUActivationCorrect(const arma::colvec input,
-                                     const arma::colvec target)
-{
-  LeakyReLULayer<> lrf;
-
-  // Test the activation function using the entire vector as input.
-  arma::colvec activations;
-  lrf.Forward(input, activations);
-  for (size_t i = 0; i < activations.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(activations.at(i), target.at(i), 1e-3);
-  }
-}
-
-/*
- * Implementation of the LeakyReLU activation function derivative test.
- * The derivative function is implemented as LeakyReLU layer in the file
- * leaky_relu_layer.hpp
- *
- * @param input Input data used for evaluating the LeakyReLU activation function.
- * @param target Target data used to evaluate the LeakyReLU activation.
- */
-
-void CheckLeakyReLUDerivativeCorrect(const arma::colvec input,
-                                     const arma::colvec target)
-{
-  LeakyReLULayer<> lrf;
-
-  // Test the calculation of the derivatives using the entire vector as input.
-  arma::colvec derivatives;
-
-  // This error vector will be set to 1 to get the derivatives.
-  arma::colvec error(input.n_elem);
-  lrf.Backward(input, (arma::colvec)error.ones(), derivatives);
-  for (size_t i = 0; i < derivatives.n_elem; i++)
-  {
-    BOOST_REQUIRE_CLOSE(derivatives.at(i), target.at(i), 1e-3);
-  }
-}
-
-/**
- * Basic test of the tanh function.
- */
-BOOST_AUTO_TEST_CASE(TanhFunctionTest)
-{
-  const arma::colvec desiredActivations("-0.96402758 0.9966824 0.99975321 -1 \
-                                         0.76159416 -0.76159416 0.96402758 0");
-
-  const arma::colvec desiredDerivatives("0.07065082 0.00662419 0.00049352 0 \
-                                         0.41997434 0.41997434 0.07065082 1");
-
-  CheckActivationCorrect<TanhFunction>(activationData, desiredActivations);
-  CheckDerivativeCorrect<TanhFunction>(desiredActivations, desiredDerivatives);
-  CheckInverseCorrect<TanhFunction>(desiredActivations);
-}
-
-/**
- * Basic test of the logistic function.
- */
-BOOST_AUTO_TEST_CASE(LogisticFunctionTest)
-{
-  const arma::colvec desiredActivations("1.19202922e-01 9.60834277e-01 \
-                                         9.89013057e-01 3.04574e-44 \
-                                         7.31058579e-01 2.68941421e-01 \
-                                         8.80797078e-01 0.5");
-
-  const arma::colvec desiredDerivatives("0.10499359 0.03763177 0.01086623 \
-                                         3.04574e-44 0.19661193 0.19661193 \
-                                         0.10499359 0.25");
-
-  CheckActivationCorrect<LogisticFunction>(activationData, desiredActivations);
-  CheckDerivativeCorrect<LogisticFunction>(desiredActivations,
-      desiredDerivatives);
-  CheckInverseCorrect<LogisticFunction>(activationData);
-}
-
-/**
- * Basic test of the softsign function.
- */
-BOOST_AUTO_TEST_CASE(SoftsignFunctionTest)
-{
-  const arma::colvec desiredActivations("-0.66666667 0.76190476 0.81818182 \
-                                         -0.99011858 0.5 -0.5 0.66666667 0");
-
-  const arma::colvec desiredDerivatives("0.11111111 0.05668934 0.03305785 \
-                                         9.7642e-05 0.25 0.25 0.11111111 1");
-
-  CheckActivationCorrect<SoftsignFunction>(activationData, desiredActivations);
-  CheckDerivativeCorrect<SoftsignFunction>(desiredActivations,
-      desiredDerivatives);
-  CheckInverseCorrect<SoftsignFunction>(desiredActivations);
-}
-
-/**
- * Basic test of the identity function.
- */
-BOOST_AUTO_TEST_CASE(IdentityFunctionTest)
-{
-  const arma::colvec desiredDerivatives = arma::ones<arma::colvec>(
-      activationData.n_elem);
-
-  CheckActivationCorrect<IdentityFunction>(activationData, activationData);
-  CheckDerivativeCorrect<IdentityFunction>(activationData, desiredDerivatives);
-}
-
-/**
- * Basic test of the rectifier function.
- */
-BOOST_AUTO_TEST_CASE(RectifierFunctionTest)
-{
-  const arma::colvec desiredActivations("0 3.2 4.5 0 1 0 2 0");
-
-  const arma::colvec desiredDerivatives("0 1 1 0 1 0 1 0");
-
-  CheckActivationCorrect<RectifierFunction>(activationData, desiredActivations);
-  CheckDerivativeCorrect<RectifierFunction>(desiredActivations,
-      desiredDerivatives);
-}
-
-/**
- * Basic test of the LeakyReLU function.
- */
-BOOST_AUTO_TEST_CASE(LeakyReLUFunctionTest)
-{
-  const arma::colvec desiredActivations("-0.06 3.2 4.5 -3.006 \
-                                         1 -0.03 2 0");
-
-  const arma::colvec desiredDerivatives("0.03 1 1 0.03 \
-                                         1 0.03 1 1");
-
-  CheckLeakyReLUActivationCorrect(activationData, desiredActivations);
-  CheckLeakyReLUDerivativeCorrect(desiredActivations, desiredDerivatives);
-}
-
-/**
- * Basic test of the HardTanH function.
- */
-BOOST_AUTO_TEST_CASE(HardTanHFunctionTest)
-{
-  const arma::colvec desiredActivations("-1 1 1 -1 \
-                                         1 -1 1 0");
-
-  const arma::colvec desiredDerivatives("0 0 0 0 \
-                                         1 1 0 1");
-
-  CheckHardTanHActivationCorrect(activationData, desiredActivations);
-  CheckHardTanHDerivativeCorrect(activationData, desiredDerivatives);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
-
diff --git a/src/mlpack/tests/ada_delta_test.cpp b/src/mlpack/tests/ada_delta_test.cpp
deleted file mode 100644
index 481e117..0000000
--- a/src/mlpack/tests/ada_delta_test.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * @file ada_delta_test.cpp
- * @author Marcus Edel
- * @author Vasanth Kalingeri
- *
- * Tests the AdaDelta optimizer
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/core/optimizers/adadelta/ada_delta.hpp>
-#include <mlpack/core/optimizers/sgd/test_function.hpp>
-#include <mlpack/methods/logistic_regression/logistic_regression.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace arma;
-using namespace mlpack::optimization;
-using namespace mlpack::optimization::test;
-
-using namespace mlpack::distribution;
-using namespace mlpack::regression;
-
-using namespace mlpack;
-
-BOOST_AUTO_TEST_SUITE(AdaDeltaTest);
-
-/**
- * Tests the Adadelta optimizer using a simple test function.
- */
-BOOST_AUTO_TEST_CASE(SimpleAdaDeltaTestFunction)
-{
-  SGDTestFunction f;
-  AdaDelta<SGDTestFunction> optimizer(f, 0.99, 1e-8, 5000000, 1e-9, true);
-
-  arma::mat coordinates = f.GetInitialPoint();
-  optimizer.Optimize(coordinates);
-
-  BOOST_REQUIRE_SMALL(coordinates[0], 0.003);
-  BOOST_REQUIRE_SMALL(coordinates[1], 0.003);
-  BOOST_REQUIRE_SMALL(coordinates[2], 0.003);
-}
-
-/**
- * Run AdaDelta on logistic regression and make sure the results are acceptable.
- */
-BOOST_AUTO_TEST_CASE(LogisticRegressionTest)
-{
-  // Generate a two-Gaussian dataset.
-  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
-  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
-
-  arma::mat data(3, 1000);
-  arma::Row<size_t> responses(1000);
-  for (size_t i = 0; i < 500; ++i)
-  {
-    data.col(i) = g1.Random();
-    responses[i] = 0;
-  }
-  for (size_t i = 500; i < 1000; ++i)
-  {
-    data.col(i) = g2.Random();
-    responses[i] = 1;
-  }
-
-  // Shuffle the dataset.
-  arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0,
-      data.n_cols - 1, data.n_cols));
-  arma::mat shuffledData(3, 1000);
-  arma::Row<size_t> shuffledResponses(1000);
-  for (size_t i = 0; i < data.n_cols; ++i)
-  {
-    shuffledData.col(i) = data.col(indices[i]);
-    shuffledResponses[i] = responses[indices[i]];
-  }
-
-  // Create a test set.
-  arma::mat testData(3, 1000);
-  arma::Row<size_t> testResponses(1000);
-  for (size_t i = 0; i < 500; ++i)
-  {
-    testData.col(i) = g1.Random();
-    testResponses[i] = 0;
-  }
-  for (size_t i = 500; i < 1000; ++i)
-  {
-    testData.col(i) = g2.Random();
-    testResponses[i] = 1;
-  }
-
-  LogisticRegression<> lr(shuffledData.n_rows, 0.5);
-
-  LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  AdaDelta<LogisticRegressionFunction<> > AdaDelta(lrf);
-  lr.Train(AdaDelta);
-
-  // Ensure that the error is close to zero.
-  const double acc = lr.ComputeAccuracy(data, responses);
-  BOOST_REQUIRE_CLOSE(acc, 100.0, 0.3); // 0.3% error tolerance.
-
-  const double testAcc = lr.ComputeAccuracy(testData, testResponses);
-  BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance.
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/adam_test.cpp b/src/mlpack/tests/adam_test.cpp
deleted file mode 100644
index 2c52f64..0000000
--- a/src/mlpack/tests/adam_test.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * @file adam_test.cpp
- * @author Vasanth Kalingeri
- *
- * Tests the Adam optimizer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/core/optimizers/adam/adam.hpp>
-#include <mlpack/core/optimizers/sgd/test_function.hpp>
-#include <mlpack/methods/logistic_regression/logistic_regression.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace arma;
-using namespace mlpack::optimization;
-using namespace mlpack::optimization::test;
-
-using namespace mlpack::distribution;
-using namespace mlpack::regression;
-
-using namespace mlpack;
-
-BOOST_AUTO_TEST_SUITE(AdamTest);
-
-/**
- * Tests the Adam optimizer using a simple test function.
- */
-BOOST_AUTO_TEST_CASE(SimpleAdamTestFunction)
-{
-  SGDTestFunction f;
-  Adam<SGDTestFunction> optimizer(f, 1e-3, 0.9, 0.999, 1e-8, 5000000, 1e-9, true);
-
-  arma::mat coordinates = f.GetInitialPoint();
-  optimizer.Optimize(coordinates);
-
-  BOOST_REQUIRE_SMALL(coordinates[0], 0.1);
-  BOOST_REQUIRE_SMALL(coordinates[1], 0.1);
-  BOOST_REQUIRE_SMALL(coordinates[2], 0.1);
-}
-
-/**
- * Run Adam on logistic regression and make sure the results are acceptable.
- */
-BOOST_AUTO_TEST_CASE(LogisticRegressionTest)
-{
-  // Generate a two-Gaussian dataset.
-  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
-  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
-
-  arma::mat data(3, 1000);
-  arma::Row<size_t> responses(1000);
-  for (size_t i = 0; i < 500; ++i)
-  {
-    data.col(i) = g1.Random();
-    responses[i] = 0;
-  }
-  for (size_t i = 500; i < 1000; ++i)
-  {
-    data.col(i) = g2.Random();
-    responses[i] = 1;
-  }
-
-  // Shuffle the dataset.
-  arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0,
-      data.n_cols - 1, data.n_cols));
-  arma::mat shuffledData(3, 1000);
-  arma::Row<size_t> shuffledResponses(1000);
-  for (size_t i = 0; i < data.n_cols; ++i)
-  {
-    shuffledData.col(i) = data.col(indices[i]);
-    shuffledResponses[i] = responses[indices[i]];
-  }
-
-  // Create a test set.
-  arma::mat testData(3, 1000);
-  arma::Row<size_t> testResponses(1000);
-  for (size_t i = 0; i < 500; ++i)
-  {
-    testData.col(i) = g1.Random();
-    testResponses[i] = 0;
-  }
-  for (size_t i = 500; i < 1000; ++i)
-  {
-    testData.col(i) = g2.Random();
-    testResponses[i] = 1;
-  }
-
-  LogisticRegression<> lr(shuffledData.n_rows, 0.5);
-
-  LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  Adam<LogisticRegressionFunction<> > adam(lrf);
-  lr.Train(adam);
-
-  // Ensure that the error is close to zero.
-  const double acc = lr.ComputeAccuracy(data, responses);
-  BOOST_REQUIRE_CLOSE(acc, 100.0, 0.3); // 0.3% error tolerance.
-
-  const double testAcc = lr.ComputeAccuracy(testData, testResponses);
-  BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance.
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/convolution_test.cpp b/src/mlpack/tests/convolution_test.cpp
deleted file mode 100644
index a277b9c..0000000
--- a/src/mlpack/tests/convolution_test.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-/**
- * @file convolution_test.cpp
- * @author Shangtong Zhang
- * @author Marcus Edel
- *
- * Tests for various convolution strategies.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
-#include <mlpack/methods/ann/convolution_rules/naive_convolution.hpp>
-#include <mlpack/methods/ann/convolution_rules/fft_convolution.hpp>
-#include <mlpack/methods/ann/convolution_rules/svd_convolution.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(ConvolutionTest);
-
-/*
- * Implementation of the convolution function test.
- *
- * @param input Input used to perform the convolution.
- * @param filter Filter used to perform the conolution.
- * @param output The reference output data that contains the results of the
- * convolution.
- *
- * @tparam ConvolutionFunction Convolution function used for the check.
- */
-template<class ConvolutionFunction>
-void Convolution2DMethodTest(const arma::mat input,
-                             const arma::mat filter,
-                             const arma::mat output)
-{
-  arma::mat convOutput;
-  ConvolutionFunction::Convolution(input, filter, convOutput);
-
-  // Check the outut dimension.
-  bool b = (convOutput.n_rows == output.n_rows) &&
-      (convOutput.n_cols == output.n_cols);
-  BOOST_REQUIRE_EQUAL(b, 1);
-
-  const double* outputPtr = output.memptr();
-  const double* convOutputPtr = convOutput.memptr();
-
-  for (size_t i = 0; i < output.n_elem; i++, outputPtr++, convOutputPtr++)
-    BOOST_REQUIRE_CLOSE(*outputPtr, *convOutputPtr, 1e-3);
-}
-
-/*
- * Implementation of the convolution function test using 3rd order tensors.
- *
- * @param input Input used to perform the convolution.
- * @param filter Filter used to perform the conolution.
- * @param output The reference output data that contains the results of the
- * convolution.
- *
- * @tparam ConvolutionFunction Convolution function used for the check.
- */
-template<class ConvolutionFunction>
-void Convolution3DMethodTest(const arma::cube input,
-                             const arma::cube filter,
-                             const arma::cube output)
-{
-  arma::cube convOutput;
-  ConvolutionFunction::Convolution(input, filter, convOutput);
-
-  // Check the outut dimension.
-  bool b = (convOutput.n_rows == output.n_rows) &&
-      (convOutput.n_cols == output.n_cols &&
-      convOutput.n_slices == output.n_slices);
-  BOOST_REQUIRE_EQUAL(b, 1);
-
-  const double* outputPtr = output.memptr();
-  const double* convOutputPtr = convOutput.memptr();
-
-  for (size_t i = 0; i < output.n_elem; i++, outputPtr++, convOutputPtr++)
-    BOOST_REQUIRE_CLOSE(*outputPtr, *convOutputPtr, 1e-3);
-}
-
-/*
- * Implementation of the convolution function test using dense matrix as input
- * and a 3rd order tensors as filter and output (batch modus).
- *
- * @param input Input used to perform the convolution.
- * @param filter Filter used to perform the conolution.
- * @param output The reference output data that contains the results of the
- * convolution.
- *
- * @tparam ConvolutionFunction Convolution function used for the check.
- */
-template<class ConvolutionFunction>
-void ConvolutionMethodBatchTest(const arma::mat input,
-                                const arma::cube filter,
-                                const arma::cube output)
-{
-  arma::cube convOutput;
-  ConvolutionFunction::Convolution(input, filter, convOutput);
-
-  // Check the outut dimension.
-  bool b = (convOutput.n_rows == output.n_rows) &&
-      (convOutput.n_cols == output.n_cols &&
-      convOutput.n_slices == output.n_slices);
-  BOOST_REQUIRE_EQUAL(b, 1);
-
-  const double* outputPtr = output.memptr();
-  const double* convOutputPtr = convOutput.memptr();
-
-  for (size_t i = 0; i < output.n_elem; i++, outputPtr++, convOutputPtr++)
-    BOOST_REQUIRE_CLOSE(*outputPtr, *convOutputPtr, 1e-3);
-}
-
-/**
- * Test the convolution (valid) methods.
- */
-BOOST_AUTO_TEST_CASE(ValidConvolution2DTest)
-{
-  // Generate dataset for convolution function tests.
-  arma::mat input, filter, output;
-  input << 1 << 2 << 3 << 4 << arma::endr
-        << 4 << 1 << 2 << 3 << arma::endr
-        << 3 << 4 << 1 << 2 << arma::endr
-        << 2 << 3 << 4 << 1;
-
-  filter << 1 << 0 << -1 << arma::endr
-         << 0 << 1 << 0 << arma::endr
-         << -1 << 0 << 1;
-
-  output << -3 << -2 << arma::endr
-         << 8 << -3;
-
-  // Perform the naive convolution approach.
-  Convolution2DMethodTest<NaiveConvolution<ValidConvolution> >(input, filter,
-      output);
-
-  // Perform the convolution trough fft.
-  Convolution2DMethodTest<FFTConvolution<ValidConvolution> >(input, filter,
-      output);
-
-  // Perform the convolution using singular value decomposition to
-  // speeded up the computation.
-  Convolution2DMethodTest<SVDConvolution<ValidConvolution> >(input, filter,
-      output);
-}
-
-/**
- * Test the convolution (full) methods.
- */
-BOOST_AUTO_TEST_CASE(FullConvolution2DTest)
-{
-  // Generate dataset for convolution function tests.
-  arma::mat input, filter, output;
-  input << 1 << 2 << 3 << 4 << arma::endr
-        << 4 << 1 << 2 << 3 << arma::endr
-        << 3 << 4 << 1 << 2 << arma::endr
-        << 2 << 3 << 4 << 1;
-
-  filter << 1 << 0 << -1 << arma::endr
-         << 1 << 1 << 1 << arma::endr
-         << -1 << 0 << 1;
-
-  output << 1 << 2 << 2 << 2 << -3 << -4 << arma::endr
-         << 5 << 4 << 4 << 11 << 5 << 1 << arma::endr
-         << 6 << 7 << 3 << 2 << 7 << 5 << arma::endr
-         << 1 << 9 << 12 << 3 << 1 << 4 << arma::endr
-         << -1 << 1 << 11 << 10 << 6 << 3 << arma::endr
-         << -2 << -3 << -2 << 2 << 4 << 1;
-
-  // Perform the naive convolution approach.
-  Convolution2DMethodTest<NaiveConvolution<FullConvolution> >(input, filter,
-      output);
-
-  // Perform the convolution trough fft.
-  Convolution2DMethodTest<FFTConvolution<FullConvolution> >(input, filter,
-      output);
-
-  // Perform the convolution using singular value decomposition to
-  // speeded up the computation.
-  Convolution2DMethodTest<SVDConvolution<FullConvolution> >(input, filter,
-      output);
-}
-
-/**
- * Test the convolution (valid) methods using 3rd order tensors.
- */
-BOOST_AUTO_TEST_CASE(ValidConvolution3DTest)
-{
-  // Generate dataset for convolution function tests.
-  arma::mat input, filter, output;
-  input << 1 << 2 << 3 << 4 << arma::endr
-        << 4 << 1 << 2 << 3 << arma::endr
-        << 3 << 4 << 1 << 2 << arma::endr
-        << 2 << 3 << 4 << 1;
-
-  filter << 1 << 0 << -1 << arma::endr
-         << 0 << 1 << 0 << arma::endr
-         << -1 << 0 << 1;
-
-  output << -3 << -2 << arma::endr
-         << 8 << -3;
-
-  arma::cube inputCube(input.n_rows, input.n_cols, 2);
-  inputCube.slice(0) = input;
-  inputCube.slice(1) = input;
-
-  arma::cube filterCube(filter.n_rows, filter.n_cols, 2);
-  filterCube.slice(0) = filter;
-  filterCube.slice(1) = filter;
-
-  arma::cube outputCube(output.n_rows, output.n_cols, 2);
-  outputCube.slice(0) = output;
-  outputCube.slice(1) = output;
-
-  // Perform the naive convolution approach.
-  Convolution3DMethodTest<NaiveConvolution<ValidConvolution> >(inputCube,
-      filterCube, outputCube);
-
-  // Perform the convolution trough fft.
-  Convolution3DMethodTest<FFTConvolution<ValidConvolution> >(inputCube,
-      filterCube, outputCube);
-
-  // Perform the convolution using using the singular value decomposition to
-  // speeded up the computation.
-  Convolution3DMethodTest<SVDConvolution<ValidConvolution> >(inputCube,
-      filterCube, outputCube);
-}
-
-/**
- * Test the convolution (full) methods using 3rd order tensors.
- */
-BOOST_AUTO_TEST_CASE(FullConvolution3DTest)
-{
-  // Generate dataset for convolution function tests.
-  arma::mat input, filter, output;
-  input << 1 << 2 << 3 << 4 << arma::endr
-        << 4 << 1 << 2 << 3 << arma::endr
-        << 3 << 4 << 1 << 2 << arma::endr
-        << 2 << 3 << 4 << 1;
-
-  filter << 1 << 0 << -1 << arma::endr
-         << 1 << 1 << 1 << arma::endr
-         << -1 << 0 << 1;
-
-  output << 1 << 2 << 2 << 2 << -3 << -4 << arma::endr
-         << 5 << 4 << 4 << 11 << 5 << 1 << arma::endr
-         << 6 << 7 << 3 << 2 << 7 << 5 << arma::endr
-         << 1 << 9 << 12 << 3 << 1 << 4 << arma::endr
-         << -1 << 1 << 11 << 10 << 6 << 3 << arma::endr
-         << -2 << -3 << -2 << 2 << 4 << 1;
-
-  arma::cube inputCube(input.n_rows, input.n_cols, 2);
-  inputCube.slice(0) = input;
-  inputCube.slice(1) = input;
-
-  arma::cube filterCube(filter.n_rows, filter.n_cols, 2);
-  filterCube.slice(0) = filter;
-  filterCube.slice(1) = filter;
-
-  arma::cube outputCube(output.n_rows, output.n_cols, 2);
-  outputCube.slice(0) = output;
-  outputCube.slice(1) = output;
-
-  // Perform the naive convolution approach.
-  Convolution3DMethodTest<NaiveConvolution<FullConvolution> >(inputCube,
-      filterCube, outputCube);
-
-  // Perform the convolution trough fft.
-  Convolution3DMethodTest<FFTConvolution<FullConvolution> >(inputCube,
-      filterCube, outputCube);
-
-  // Perform the convolution using using the singular value decomposition to
-  // speeded up the computation.
-  Convolution3DMethodTest<SVDConvolution<FullConvolution> >(inputCube,
-      filterCube, outputCube);
-}
-
-/**
- * Test the convolution (valid) methods using dense matrix as input and a 3rd
- * order tensors as filter and output (batch modus).
- */
-BOOST_AUTO_TEST_CASE(ValidConvolutionBatchTest)
-{
-  // Generate dataset for convolution function tests.
-  arma::mat input, filter, output;
-  input << 1 << 2 << 3 << 4 << arma::endr
-        << 4 << 1 << 2 << 3 << arma::endr
-        << 3 << 4 << 1 << 2 << arma::endr
-        << 2 << 3 << 4 << 1;
-
-  filter << 1 << 0 << -1 << arma::endr
-         << 0 << 1 << 0 << arma::endr
-         << -1 << 0 << 1;
-
-  output << -3 << -2 << arma::endr
-         << 8 << -3;
-
-  arma::cube filterCube(filter.n_rows, filter.n_cols, 2);
-  filterCube.slice(0) = filter;
-  filterCube.slice(1) = filter;
-
-  arma::cube outputCube(output.n_rows, output.n_cols, 2);
-  outputCube.slice(0) = output;
-  outputCube.slice(1) = output;
-
-  // Perform the naive convolution approach.
-  ConvolutionMethodBatchTest<NaiveConvolution<ValidConvolution> >(input,
-      filterCube, outputCube);
-
-  // Perform the convolution trough fft.
-  ConvolutionMethodBatchTest<FFTConvolution<ValidConvolution> >(input,
-      filterCube, outputCube);
-
-  // Perform the convolution using using the singular value decomposition to
-  // speeded up the computation.
-  ConvolutionMethodBatchTest<SVDConvolution<ValidConvolution> >(input,
-      filterCube, outputCube);
-}
-
-/**
- * Test the convolution (full) methods using dense matrix as input and a 3rd
- * order tensors as filter and output (batch modus).
- */
-BOOST_AUTO_TEST_CASE(FullConvolutionBatchTest)
-{
-  // Generate dataset for convolution function tests.
-  arma::mat input, filter, output;
-  input << 1 << 2 << 3 << 4 << arma::endr
-        << 4 << 1 << 2 << 3 << arma::endr
-        << 3 << 4 << 1 << 2 << arma::endr
-        << 2 << 3 << 4 << 1;
-
-  filter << 1 << 0 << -1 << arma::endr
-         << 1 << 1 << 1 << arma::endr
-         << -1 << 0 << 1;
-
-  output << 1 << 2 << 2 << 2 << -3 << -4 << arma::endr
-         << 5 << 4 << 4 << 11 << 5 << 1 << arma::endr
-         << 6 << 7 << 3 << 2 << 7 << 5 << arma::endr
-         << 1 << 9 << 12 << 3 << 1 << 4 << arma::endr
-         << -1 << 1 << 11 << 10 << 6 << 3 << arma::endr
-         << -2 << -3 << -2 << 2 << 4 << 1;
-
-  arma::cube filterCube(filter.n_rows, filter.n_cols, 2);
-  filterCube.slice(0) = filter;
-  filterCube.slice(1) = filter;
-
-  arma::cube outputCube(output.n_rows, output.n_cols, 2);
-  outputCube.slice(0) = output;
-  outputCube.slice(1) = output;
-
-  // Perform the naive convolution approach.
-  ConvolutionMethodBatchTest<NaiveConvolution<FullConvolution> >(input,
-      filterCube, outputCube);
-
-  // Perform the convolution trough fft.
-  ConvolutionMethodBatchTest<FFTConvolution<FullConvolution> >(input,
-      filterCube, outputCube);
-
-  // Perform the convolution using using the singular value decomposition to
-  // speeded up the computation.
-  ConvolutionMethodBatchTest<SVDConvolution<FullConvolution> >(input,
-      filterCube, outputCube);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/convolutional_network_test.cpp b/src/mlpack/tests/convolutional_network_test.cpp
deleted file mode 100644
index 52e1a6c..0000000
--- a/src/mlpack/tests/convolutional_network_test.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * @file convolutional_network_test.cpp
- * @author Marcus Edel
- *
- * Tests the convolutional neural network.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-
-#include <mlpack/methods/ann/layer/one_hot_layer.hpp>
-#include <mlpack/methods/ann/layer/conv_layer.hpp>
-#include <mlpack/methods/ann/layer/pooling_layer.hpp>
-#include <mlpack/methods/ann/layer/softmax_layer.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/cnn.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace mlpack::optimization;
-
-
-BOOST_AUTO_TEST_SUITE(ConvolutionalNetworkTest);
-
-/**
- * Train and evaluate a vanilla network with the specified structure.
- */
-template<
-    typename PerformanceFunction
->
-void BuildVanillaNetwork()
-{
-  arma::mat X;
-  X.load("mnist_first250_training_4s_and_9s.arm");
-
-  // Normalize each point since these are images.
-  arma::uword nPoints = X.n_cols;
-  for (arma::uword i = 0; i < nPoints; i++)
-  {
-    X.col(i) /= norm(X.col(i), 2);
-  }
-
-  // Build the target matrix.
-  arma::mat Y = arma::zeros<arma::mat>(10, nPoints);
-  for (size_t i = 0; i < nPoints; i++)
-  {
-    if (i < nPoints / 2)
-    {
-      Y.col(i)(5) = 1;
-    }
-    else
-    {
-      Y.col(i)(8) = 1;
-    }
-  }
-
-  arma::cube input = arma::cube(28, 28, nPoints);
-  for (size_t i = 0; i < nPoints; i++)
-    input.slice(i) = arma::mat(X.colptr(i), 28, 28);
-
-  /*
-   * Construct a convolutional neural network with a 28x28x1 input layer,
-   * 24x24x8 convolution layer, 12x12x8 pooling layer, 8x8x12 convolution layer
-   * and a 4x4x12 pooling layer which is fully connected with the output layer.
-   * The network structure looks like:
-   *
-   * Input    Convolution  Pooling      Convolution  Pooling      Output
-   * Layer    Layer        Layer        Layer        Layer        Layer
-   *
-   *          +---+        +---+        +---+        +---+
-   *          | +---+      | +---+      | +---+      | +---+
-   * +---+    | | +---+    | | +---+    | | +---+    | | +---+    +---+
-   * |   |    | | |   |    | | |   |    | | |   |    | | |   |    |   |
-   * |   +--> +-+ |   +--> +-+ |   +--> +-+ |   +--> +-+ |   +--> |   |
-   * |   |      +-+   |      +-+   |      +-+   |      +-+   |    |   |
-   * +---+        +---+        +---+        +---+        +---+    +---+
-   */
-
-  ConvLayer<> convLayer0(1, 8, 5, 5);
-  BiasLayer2D<> biasLayer0(8);
-  BaseLayer2D<> baseLayer0;
-  PoolingLayer<> poolingLayer0(2);
-
-  ConvLayer<> convLayer1(8, 12, 5, 5);
-  BiasLayer2D<> biasLayer1(12);
-  BaseLayer2D<> baseLayer1;
-  PoolingLayer<> poolingLayer1(2);
-
-  LinearMappingLayer<> linearLayer0(4608, 10);
-  BiasLayer<> biasLayer2(10);
-  SoftmaxLayer<> softmaxLayer0;
-
-  OneHotLayer outputLayer;
-
-  auto modules = std::tie(convLayer0, baseLayer0, linearLayer0, softmaxLayer0);
-
-  CNN<decltype(modules), decltype(outputLayer),
-      RandomInitialization, MeanSquaredErrorFunction> net(modules, outputLayer);
-  biasLayer0.Weights().zeros();
-  biasLayer1.Weights().zeros();
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8, 10 * input.n_slices, 0);
-
-  net.Train(input, Y, opt);
-
-  arma::mat prediction;
-  net.Predict(input, prediction);
-
-  size_t error = 0;
-  for (size_t i = 0; i < nPoints; i++)
-  {
-    if (arma::sum(arma::sum(
-        arma::abs(prediction.col(i) - Y.col(i)))) == 0)
-    {
-      error++;
-    }
-  }
-
-  double classificationError = 1 - double(error) / nPoints;
-  BOOST_REQUIRE_LE(classificationError, 0.6);
-}
-
-/**
- * Train the vanilla network on a larger dataset.
- */
-BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
-{
-  BuildVanillaNetwork<LogisticFunction>();
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp
deleted file mode 100644
index 4477bf2..0000000
--- a/src/mlpack/tests/feedforward_network_test.cpp
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * @file feedforward_network_test.cpp
- * @author Marcus Edel
- * @author Palash Ahuja
- *
- * Tests the feed forward network.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
-
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/dropout_layer.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-#include <mlpack/methods/ann/layer/dropconnect_layer.hpp>
-
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace mlpack::optimization;
-
-BOOST_AUTO_TEST_SUITE(FeedForwardNetworkTest);
-
-/**
- * Train and evaluate a vanilla network with the specified structure.
- */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
-void BuildVanillaNetwork(MatType& trainData,
-                         MatType& trainLabels,
-                         MatType& testData,
-                         MatType& testLabels,
-                         const size_t hiddenLayerSize,
-                         const size_t maxEpochs,
-                         const double classificationErrorThreshold)
-{
-  /*
-   * Construct a feed forward network with trainData.n_rows input nodes,
-   * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
-   * network structure looks like:
-   *
-   *  Input         Hidden        Output
-   *  Layer         Layer         Layer
-   * +-----+       +-----+       +-----+
-   * |     |       |     |       |     |
-   * |     +------>|     +------>|     |
-   * |     |     +>|     |     +>|     |
-   * +-----+     | +--+--+     | +-----+
-   *             |             |
-   *  Bias       |  Bias       |
-   *  Layer      |  Layer      |
-   * +-----+     | +-----+     |
-   * |     |     | |     |     |
-   * |     +-----+ |     +-----+
-   * |     |       |     |
-   * +-----+       +-----+
-   */
-
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> inputBiasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> inputBaseLayer;
-
-  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
-  BiasLayer<> hiddenBiasLayer1(trainLabels.n_rows);
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
-
-  auto modules = std::tie(inputLayer, inputBiasLayer, inputBaseLayer,
-                          hiddenLayer1, hiddenBiasLayer1, outputLayer);
-
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-      PerformanceFunctionType> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
-
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
-
-  size_t error = 0;
-  for (size_t i = 0; i < testData.n_cols; i++)
-  {
-    if (arma::sum(arma::sum(
-        arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
-    {
-      error++;
-    }
-  }
-
-  double classificationError = 1 - double(error) / testData.n_cols;
-  BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
-}
-
-/**
- * Train the vanilla network on a larger dataset.
- */
-BOOST_AUTO_TEST_CASE(VanillaNetworkTest)
-{
-  // Load the dataset.
-  arma::mat dataset;
-  data::Load("thyroid_train.csv", dataset, true);
-
-  arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
-      dataset.n_cols - 1);
-  arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
-      dataset.n_rows - 1, dataset.n_cols - 1);
-
-  data::Load("thyroid_test.csv", dataset, true);
-
-  arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
-      dataset.n_cols - 1);
-  arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0,
-      dataset.n_rows - 1, dataset.n_cols - 1);
-
-  // Vanilla neural net with logistic activation function.
-  // Because 92 percent of the patients are not hyperthyroid the neural
-  // network must be significant better than 92%.
-  BuildVanillaNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 8, 200, 0.1);
-
-  dataset.load("mnist_first250_training_4s_and_9s.arm");
-
-  // Normalize each point since these are images.
-  for (size_t i = 0; i < dataset.n_cols; ++i)
-    dataset.col(i) /= norm(dataset.col(i), 2);
-
-  arma::mat labels = arma::zeros(1, dataset.n_cols);
-  labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1);
-
-  // Vanilla neural net with logistic activation function.
-  BuildVanillaNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 30, 30, 0.4);
-
-  // Vanilla neural net with tanh activation function.
-  BuildVanillaNetwork<TanhFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-    (dataset, labels, dataset, labels, 10, 30, 0.4);
-}
-
-/**
- * Train and evaluate a Dropout network with the specified structure.
- */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
-void BuildDropoutNetwork(MatType& trainData,
-                         MatType& trainLabels,
-                         MatType& testData,
-                         MatType& testLabels,
-                         const size_t hiddenLayerSize,
-                         const size_t maxEpochs,
-                         const double classificationErrorThreshold)
-{
-  /*
-   * Construct a feed forward network with trainData.n_rows input nodes,
-   * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
-   * network structure looks like:
-   *
-   *  Input         Hidden        Dropout      Output
-   *  Layer         Layer         Layer        Layer
-   * +-----+       +-----+       +-----+       +-----+
-   * |     |       |     |       |     |       |     |
-   * |     +------>|     +------>|     +------>|     |
-   * |     |     +>|     |       |     |       |     |
-   * +-----+     | +--+--+       +-----+       +-----+
-   *             |
-   *  Bias       |
-   *  Layer      |
-   * +-----+     |
-   * |     |     |
-   * |     +-----+
-   * |     |
-   * +-----+
-   */
-
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> biasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> hiddenLayer0;
-  DropoutLayer<> dropoutLayer0;
-
-  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
-
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, dropoutLayer0,
-                          hiddenLayer1, outputLayer);
-
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-      PerformanceFunctionType> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
-
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
-
-  size_t error = 0;
-  for (size_t i = 0; i < testData.n_cols; i++)
-  {
-    if (arma::sum(arma::sum(
-        arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
-    {
-      error++;
-    }
-  }
-
-  double classificationError = 1 - double(error) / testData.n_cols;
-  BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
-}
-
-/**
- * Train the dropout network on a larger dataset.
- */
-BOOST_AUTO_TEST_CASE(DropoutNetworkTest)
-{
-  // Load the dataset.
-  arma::mat dataset;
-  data::Load("thyroid_train.csv", dataset, true);
-
-  arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
-      dataset.n_cols - 1);
-  arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
-      dataset.n_rows - 1, dataset.n_cols - 1);
-
-  data::Load("thyroid_test.csv", dataset, true);
-
-  arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
-      dataset.n_cols - 1);
-  arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0,
-      dataset.n_rows - 1, dataset.n_cols - 1);
-
-  // Vanilla neural net with logistic activation function.
-  // Because 92 percent of the patients are not hyperthyroid the neural
-  // network must be significant better than 92%.
-  BuildDropoutNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 4, 100, 0.1);
-
-  dataset.load("mnist_first250_training_4s_and_9s.arm");
-
-  // Normalize each point since these are images.
-  for (size_t i = 0; i < dataset.n_cols; ++i)
-    dataset.col(i) /= norm(dataset.col(i), 2);
-
-  arma::mat labels = arma::zeros(1, dataset.n_cols);
-  labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1);
-
-  // Vanilla neural net with logistic activation function.
-  BuildDropoutNetwork<LogisticFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 8, 30, 0.4);
-
-  // Vanilla neural net with tanh activation function.
-  BuildDropoutNetwork<TanhFunction,
-                      BinaryClassificationLayer,
-                      MeanSquaredErrorFunction>
-    (dataset, labels, dataset, labels, 8, 30, 0.4);
-}
-
-/**
- * Train and evaluate a DropConnect network(with a baselayer) with the
- * specified structure.
- */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
-void BuildDropConnectNetwork(MatType& trainData,
-                             MatType& trainLabels,
-                             MatType& testData,
-                             MatType& testLabels,
-                             const size_t hiddenLayerSize,
-                             const size_t maxEpochs,
-                             const double classificationErrorThreshold)
-{
- /*
-  *  Construct a feed forward network with trainData.n_rows input nodes,
-  *  hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
-  *  network struct that looks like:
-  *
-  *  Input         Hidden     DropConnect     Output
-  *  Layer         Layer         Layer        Layer
-  * +-----+       +-----+       +-----+       +-----+
-  * |     |       |     |       |     |       |     |
-  * |     +------>|     +------>|     +------>|     |
-  * |     |     +>|     |       |     |       |     |
-  * +-----+     | +--+--+       +-----+       +-----+
-  *             |
-  *  Bias       |
-  *  Layer      |
-  * +-----+     |
-  * |     |     |
-  * |     +-----+
-  * |     |
-  * +-----+
-  *
-  *
-  */
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> biasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> hiddenLayer0;
-
-  LinearLayer<> hiddenLayer1(hiddenLayerSize, trainLabels.n_rows);
-  DropConnectLayer<decltype(hiddenLayer1)> dropConnectLayer0(hiddenLayer1);
-
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
-
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0,
-                          dropConnectLayer0, outputLayer);
-
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-              PerformanceFunctionType> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
-
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
-
-  size_t error = 0;
-  for (size_t i = 0; i < testData.n_cols; i++)
-  {
-      if (arma::sum(arma::sum(
-          arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
-      {
-          error++;
-      }
-  }
-
-  double classificationError = 1 - double(error) / testData.n_cols;
-  BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
-}
-
-/**
- * Train and evaluate a DropConnect network(with a linearlayer) with the
- * specified structure.
- */
-template<
-    typename PerformanceFunction,
-    typename OutputLayerType,
-    typename PerformanceFunctionType,
-    typename MatType = arma::mat
->
-void BuildDropConnectNetworkLinear(MatType& trainData,
-                                   MatType& trainLabels,
-                                   MatType& testData,
-                                   MatType& testLabels,
-                                   const size_t hiddenLayerSize,
-                                   const size_t maxEpochs,
-                                   const double classificationErrorThreshold)
-{
- /*
-  * Construct a feed forward network with trainData.n_rows input nodes,
-  * hiddenLayerSize hidden nodes and trainLabels.n_rows output nodes. The
-  * network struct that looks like:
-  *
-  * Input         Hidden       DropConnect     Output
-  * Layer         Layer          Layer         Layer
-  * +-----+       +-----+       +-----+       +-----+
-  * |     |       |     |       |     |       |     |
-  * |     +------>|     +------>|     +------>|     |
-  * |     |     +>|     |       |     |       |     |
-  * +-----+     | +--+--+       +-----+       +-----+
-  *             |
-  *  Bias       |
-  *  Layer      |
-  * +-----+     |
-  * |     |     |
-  * |     +-----+
-  * |     |
-  * +-----+
-  *
-  *
-  */
-  LinearLayer<> inputLayer(trainData.n_rows, hiddenLayerSize);
-  BiasLayer<> biasLayer(hiddenLayerSize);
-  BaseLayer<PerformanceFunction> hiddenLayer0;
-
-  DropConnectLayer<> dropConnectLayer0(hiddenLayerSize, trainLabels.n_rows);
-
-  BaseLayer<PerformanceFunction> outputLayer;
-
-  OutputLayerType classOutputLayer;
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0,
-                          dropConnectLayer0, outputLayer);
-
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-              PerformanceFunctionType> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.01, 0.88, 1e-8,
-      maxEpochs * trainData.n_cols, 1e-18);
-
-  net.Train(trainData, trainLabels, opt);
-
-  MatType prediction;
-  net.Predict(testData, prediction);
-
-  size_t error = 0;
-  for (size_t i = 0; i < testData.n_cols; i++)
-  {
-      if (arma::sum(arma::sum(
-          arma::abs(prediction.col(i) - testLabels.col(i)))) == 0)
-      {
-              error++;
-      }
-  }
-
-  double classificationError = 1 - double(error) / testData.n_cols;
-  BOOST_REQUIRE_LE(classificationError, classificationErrorThreshold);
-}
-/**
- * Train the dropconnect network on a larger dataset.
- */
-BOOST_AUTO_TEST_CASE(DropConnectNetworkTest)
-{
-  // Load the dataset.
-  arma::mat dataset;
-  data::Load("thyroid_train.csv", dataset, true);
-
-  arma::mat trainData = dataset.submat(0, 0, dataset.n_rows - 4,
-      dataset.n_cols - 1);
-  arma::mat trainLabels = dataset.submat(dataset.n_rows - 3, 0,
-      dataset.n_rows - 1, dataset.n_cols - 1);
-
-  data::Load("thyroid_test.csv", dataset, true);
-
-  arma::mat testData = dataset.submat(0, 0, dataset.n_rows - 4,
-      dataset.n_cols - 1);
-  arma::mat testLabels = dataset.submat(dataset.n_rows - 3, 0,
-      dataset.n_rows - 1, dataset.n_cols - 1);
-
-  // Vanilla neural net with logistic activation function.
-  // Because 92 percent of the patients are not hyperthyroid the neural
-  // network must be significant better than 92%.
-  BuildDropConnectNetwork<LogisticFunction,
-                          BinaryClassificationLayer,
-                          MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 4, 100, 0.1);
-
-  BuildDropConnectNetworkLinear<LogisticFunction,
-                                BinaryClassificationLayer,
-                                MeanSquaredErrorFunction>
-      (trainData, trainLabels, testData, testLabels, 4, 100, 0.1);
-
-  dataset.load("mnist_first250_training_4s_and_9s.arm");
-
-  // Normalize each point since these are images.
-  for (size_t i = 0; i < dataset.n_cols; ++i)
-    dataset.col(i) /= norm(dataset.col(i), 2);
-
-  arma::mat labels = arma::zeros(1, dataset.n_cols);
-  labels.submat(0, labels.n_cols / 2, 0, labels.n_cols - 1).fill(1);
-
-  // Vanilla neural net with logistic activation function.
-  BuildDropConnectNetwork<LogisticFunction,
-                          BinaryClassificationLayer,
-                          MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 8, 30, 0.4);
-
-
-  BuildDropConnectNetworkLinear<LogisticFunction,
-                                BinaryClassificationLayer,
-                                MeanSquaredErrorFunction>
-      (dataset, labels, dataset, labels, 8, 30, 0.4);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/init_rules_test.cpp b/src/mlpack/tests/init_rules_test.cpp
deleted file mode 100644
index 3ea0f8a..0000000
--- a/src/mlpack/tests/init_rules_test.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * @file init_rules_test.cpp
- * @author Marcus Edel
- *
- * Tests for the various weight initialize methods.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp>
-#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-#include <mlpack/methods/ann/init_rules/oivs_init.hpp>
-#include <mlpack/methods/ann/init_rules/orthogonal_init.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/init_rules/zero_init.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(InitRulesTest);
-
-// Test the RandomInitialization class with a constant value.
-BOOST_AUTO_TEST_CASE(ConstantInitTest)
-{
-  arma::mat weights;
-  RandomInitialization constantInit(1, 1);
-  constantInit.Initialize(weights, 100, 100);
-
-  bool b = arma::all(arma::vectorise(weights) == 1);
-  BOOST_REQUIRE_EQUAL(b, 1);
-}
-
-// Test the OrthogonalInitialization class.
-BOOST_AUTO_TEST_CASE(OrthogonalInitTest)
-{
-  arma::mat weights;
-  OrthogonalInitialization orthogonalInit;
-  orthogonalInit.Initialize(weights, 100, 200);
-
-  arma::mat orthogonalWeights = arma::eye<arma::mat>(100, 100);
-  weights *= weights.t();
-
-  for (size_t i = 0; i < weights.n_rows; i++)
-    for (size_t j = 0; j < weights.n_cols; j++)
-      BOOST_REQUIRE_SMALL(weights.at(i, j) - orthogonalWeights.at(i, j), 1e-3);
-
-  orthogonalInit.Initialize(weights, 200, 100);
-  weights = weights.t() * weights;
-
-  for (size_t i = 0; i < weights.n_rows; i++)
-    for (size_t j = 0; j < weights.n_cols; j++)
-      BOOST_REQUIRE_SMALL(weights.at(i, j) - orthogonalWeights.at(i, j), 1e-3);
-}
-
-// Test the OrthogonalInitialization class with a non default gain.
-BOOST_AUTO_TEST_CASE(OrthogonalInitGainTest)
-{
-  arma::mat weights;
-
-  const double gain = 2;
-  OrthogonalInitialization orthogonalInit(gain);
-  orthogonalInit.Initialize(weights, 100, 200);
-
-  arma::mat orthogonalWeights = arma::eye<arma::mat>(100, 100);
-  orthogonalWeights *= (gain * gain);
-  weights *= weights.t();
-
-  for (size_t i = 0; i < weights.n_rows; i++)
-    for (size_t j = 0; j < weights.n_cols; j++)
-      BOOST_REQUIRE_SMALL(weights.at(i, j) - orthogonalWeights.at(i, j), 1e-3);
-}
-
-// Test the ZeroInitialization class. If you think about it, it's kind of
-// ridiculous to test the zero init rule. But at least we make sure it
-// builds without any problems.
-BOOST_AUTO_TEST_CASE(ZeroInitTest)
-{
-  arma::mat weights;
-  ZeroInitialization zeroInit;
-  zeroInit.Initialize(weights, 100, 100);
-
-  bool b = arma::all(arma::vectorise(weights) == 0);
-  BOOST_REQUIRE_EQUAL(b, 1);
-}
-
-// Test the KathirvalavakumarSubavathiInitialization class.
-BOOST_AUTO_TEST_CASE(KathirvalavakumarSubavathiInitTest)
-{
-  arma::mat data = arma::randu<arma::mat>(100, 1);
-
-  arma::mat weights;
-  KathirvalavakumarSubavathiInitialization kathirvalavakumarSubavathiInit(
-      data, 1.5);
-  kathirvalavakumarSubavathiInit.Initialize(weights, 100, 100);
-
-  BOOST_REQUIRE_EQUAL(1, 1);
-}
-
-// Test the NguyenWidrowInitialization class.
-BOOST_AUTO_TEST_CASE(NguyenWidrowInitTest)
-{
-  arma::mat weights;
-  NguyenWidrowInitialization nguyenWidrowInit;
-  nguyenWidrowInit.Initialize(weights, 100, 100);
-
-  BOOST_REQUIRE_EQUAL(1, 1);
-}
-
-// Test the OivsInitialization class.
-BOOST_AUTO_TEST_CASE(OivsInitTest)
-{
-  arma::mat weights;
-  OivsInitialization<> oivsInit;
-  oivsInit.Initialize(weights, 100, 100);
-
-  BOOST_REQUIRE_EQUAL(1, 1);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/layer_traits_test.cpp b/src/mlpack/tests/layer_traits_test.cpp
deleted file mode 100644
index d7c0f10..0000000
--- a/src/mlpack/tests/layer_traits_test.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * @file layer_traits_test.cpp
- * @author Marcus Edel
- *
- * Test the LayerTraits class. Because all of the values are known at compile
- * time, this test is meant to ensure that uses of LayerTraits still compile
- * okay and react as expected.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/multiclass_classification_layer.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(LayerTraitsTest);
-
-// Test the defaults.
-BOOST_AUTO_TEST_CASE(DefaultsTraitsTest)
-{
-  // An irrelevant non-connection type class is used here so that the default
-  // implementation of ConnectionTraits is chosen.
-  bool b = LayerTraits<int>::IsBinary;
-  BOOST_REQUIRE_EQUAL(b, false);
-
-  b =  LayerTraits<int>::IsOutputLayer;
-  BOOST_REQUIRE_EQUAL(b, false);
-
-  b =  LayerTraits<int>::IsBiasLayer;
-  BOOST_REQUIRE_EQUAL(b, false);
-}
-
-// Test the BiasLayer traits.
-BOOST_AUTO_TEST_CASE(BiasLayerTraitsTest)
-{
-  bool b = LayerTraits<BiasLayer<> >::IsBinary;
-  BOOST_REQUIRE_EQUAL(b, false);
-
-  b = LayerTraits<BiasLayer<> >::IsOutputLayer;
-  BOOST_REQUIRE_EQUAL(b, false);
-
-  b = LayerTraits<BiasLayer<> >::IsBiasLayer;
-  BOOST_REQUIRE_EQUAL(b, true);
-}
-
-// Test the MulticlassClassificationLayer traits.
-BOOST_AUTO_TEST_CASE(MulticlassClassificationLayerTraitsTest)
-{
-  bool b = LayerTraits<MulticlassClassificationLayer<> >::IsBinary;
-  BOOST_REQUIRE_EQUAL(b, false);
-
-  b = LayerTraits<MulticlassClassificationLayer<> >::IsOutputLayer;
-  BOOST_REQUIRE_EQUAL(b, true);
-
-  b = LayerTraits<MulticlassClassificationLayer<> >::IsBiasLayer;
-  BOOST_REQUIRE_EQUAL(b, false);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/lstm_peephole_test.cpp b/src/mlpack/tests/lstm_peephole_test.cpp
deleted file mode 100644
index 1624706..0000000
--- a/src/mlpack/tests/lstm_peephole_test.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * @file lstm_peephole_test.cpp
- * @author Marcus Edel
- *
- * Tests the LSTM peepholes.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/layer/lstm_layer.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-
-BOOST_AUTO_TEST_SUITE(LSTMPeepholeTest);
-
-/*
- * Test the peephole connections in the forward pass. The test is a modification
- * of the peephole test originally written by Tom Schaul.
- */
-BOOST_AUTO_TEST_CASE(LSTMPeepholeForwardTest)
-{
-  double state1 = 0.2;
-  double state2 = 0.345;
-  double state3 = -0.135;
-  double state4 = 10000;
-
-  arma::colvec input, output;
-
-  LSTMLayer<> hiddenLayer0(1, 6, true);
-
-  hiddenLayer0.InGatePeepholeWeights() = arma::mat("3");
-  hiddenLayer0.ForgetGatePeepholeWeights() = arma::mat("4");
-  hiddenLayer0.OutGatePeepholeWeights() = arma::mat("5");
-
-  // Set the LSTM state to state1 (state = inGateActivation * cellActivation
-  // = 1 / (1 + e^(-1000)) * tanh(atanh(0.2)) = 1 * 0.2 = 0.2).
-  // outputActivation = outGateActivation * stateActivation
-  // = tanh((0.2)) * (1 / (1 + e^1000)) = 0.
-  input << state4 << state4 << std::atanh(state1) << -state4;
-  hiddenLayer0.FeedForward(input, output);
-  BOOST_REQUIRE_CLOSE(output(0), 0, 1e-3);
-
-  // Verify that the LSTM state is correctly stored.
-  input.clear();
-  input << -state4 << state4 << state4 << state4;
-  hiddenLayer0.FeedForward(input, output);
-  BOOST_REQUIRE_CLOSE(output(0), std::tanh(state1), 1e-3);
-
-  // Add state2 to the LSTM state.
-  // state = state + forgateGateActivation * state(t - 1) = 0.345 + 1 * 0.2 =
-  // 0.545
-  input.clear();
-  input << state4 << state4 << std::atanh(state2) << state4;
-  hiddenLayer0.FeedForward(input, output);
-  BOOST_REQUIRE_CLOSE(output(0), std::tanh(state1 + state2), 1e-3);
-
-  // Verify the peephole connection to the forgetgate (weight = 4) by
-  // neutralizing its contibution and therefore dividing the LSTM state value
-  // by 2.
-  input.clear();
-  input << -state4 << -(state1 + state2) * 4 << state4 << state4;
-  hiddenLayer0.FeedForward(input, output);
-  BOOST_REQUIRE_CLOSE(output(0), std::tanh((state1 + state2) / 2), 1e-3);
-
-  // Verify the peephole connection to the inputgate (weight = 3) by
-  // neutralizing its contibution and therefore dividing the provided input
-  // by 2.
-  input.clear();
-  input << -(state1 + state2) / 2 * 3 << -state4 << std::atanh(state3)
-        << state4;
-  hiddenLayer0.FeedForward(input, output);
-  BOOST_REQUIRE_CLOSE(output(0), std::tanh(state3 / 2), 1e-3);
-
-  // Verify the peephole connection to the outputgate (weight = 5) by
-  // neutralizing its contibution and therefore dividing the provided output
-  // by 2.
-  input.clear();
-  input << -state4 << state4 << state4 << -state3 / 2 * 5;
-  hiddenLayer0.FeedForward(input, output);
-  BOOST_REQUIRE_CLOSE(output(0), std::tanh(state3 / 2) / 2, 1e-3);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/network_util_test.cpp b/src/mlpack/tests/network_util_test.cpp
deleted file mode 100644
index 4f0fcf1..0000000
--- a/src/mlpack/tests/network_util_test.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/**
- * @file network_util_test.cpp
- * @author Marcus Edel
- *
- * Simple tests for things in the network_util file.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/network_util.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(NetworkUtilTest);
-
-/**
- * Test the network size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(NetworkSizeTest)
-{
-  // Create a two layer network without weights.
-  BaseLayer<> baseLayer1;
-  BaseLayer<> baseLayer2;
-  auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkSize(noneWeightNetwork), 0);
-
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(10, 10);
-  LinearLayer<> linearLayer2(10, 100);
-
-  // Reuse the layer form the first network.
-  auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2,
-      baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkSize(weightNetwork), 1100);
-}
-
-/**
- * Test the layer size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(LayerSizeTest)
-{
-  // Create layer without weights.
-  BaseLayer<> baseLayer;
-  BOOST_REQUIRE_EQUAL(LayerSize(baseLayer, baseLayer.OutputParameter()), 0);
-
-  // Create layer with weights.
-  LinearLayer<> linearLayer(10, 10);
-  BOOST_REQUIRE_EQUAL(LayerSize(linearLayer,
-      linearLayer.OutputParameter()), 100);
-}
-
-/**
- * Test the network input size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(NetworkInputSizeTest)
-{
-  // Create a two layer network without weights.
-  BaseLayer<> baseLayer1;
-  BaseLayer<> baseLayer2;
-  auto noneWeightNetwork = std::tie(baseLayer1, baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkInputSize(noneWeightNetwork), 0);
-
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(5, 10);
-  LinearLayer<> linearLayer2(10, 100);
-
-  // Reuse the layer form the first network.
-  auto weightNetwork = std::tie(linearLayer1, baseLayer1, linearLayer2,
-      baseLayer2);
-
-  BOOST_REQUIRE_EQUAL(NetworkInputSize(weightNetwork), 5);
-}
-
-/**
- * Test the layer input size auxiliary function.
- */
-BOOST_AUTO_TEST_CASE(LayerInputSizeTest)
-{
-  // Create layer without weights.
-  BaseLayer<> baseLayer;
-  BOOST_REQUIRE_EQUAL(LayerInputSize(baseLayer,
-    baseLayer.OutputParameter()), 0);
-
-  // Create layer with weights.
-  LinearLayer<> linearLayer(5, 10);
-  BOOST_REQUIRE_EQUAL(LayerInputSize(linearLayer,
-      linearLayer.OutputParameter()), 5);
-}
-
-/**
- * Test the network weight auxiliary function using the given initialization
- * rule.
- */
-BOOST_AUTO_TEST_CASE(NetworkWeightsInitTest)
-{
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(10, 10);
-  LinearLayer<> linearLayer2(10, 100);
-
-  arma::mat parameter = arma::zeros<arma::mat>(1100, 1);
-
-  // Create the network.
-  auto network = std::tie(linearLayer1, linearLayer2);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0);
-
-  RandomInitialization constantInit(1, 1);
-  NetworkWeights(constantInit, parameter, network);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100);
-  BOOST_REQUIRE_EQUAL(arma::accu(linearLayer2.Weights()), 1000);
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 1100);
-}
-
-/**
- * Test the layer weight auxiliary function using the given initialization rule.
- */
-BOOST_AUTO_TEST_CASE(LayerWeightsInitTest)
-{
-  // Create a two layer network.
-  LinearLayer<> linearLayer1(10, 10);
-
-  arma::mat parameter = arma::zeros<arma::mat>(100, 1);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 0);
-
-  RandomInitialization constantInit(1, 1);
-  arma::mat output;
-  LayerWeights(constantInit, linearLayer1, parameter, 0, output);
-
-  BOOST_REQUIRE_EQUAL(arma::accu(linearLayer1.Weights()), 100);
-  BOOST_REQUIRE_EQUAL(arma::accu(parameter), 100);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/pooling_rules_test.cpp b/src/mlpack/tests/pooling_rules_test.cpp
deleted file mode 100644
index 0dd2c9d..0000000
--- a/src/mlpack/tests/pooling_rules_test.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * @file convolution_test.cpp
- * @author Marcus Edel
- *
- * Tests for various convolution strategies.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/pooling_rules/max_pooling.hpp>
-#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(PoolingTest);
-
-/**
- * Test the max pooling rule.
- */
-BOOST_AUTO_TEST_CASE(MaxPoolingTest)
-{
-  // The data was generated by magic(6) in MATLAB.
-  arma::mat input, output;
-  input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr
-        << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr
-        << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr
-        << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr
-        << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr
-        << 4 << 36 << 29 << 13 << 18 << 11;
-
-  // Expected output of the generated 6 x 6 matrix.
-  const double poolingOutput = 36;
-
-  MaxPooling poolingRule;
-
-  // Test the pooling function.
-  BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput);
-
-  // Test the unpooling function.
-  poolingRule.Unpooling(input, input.max(), output);
-  BOOST_REQUIRE_EQUAL(arma::accu(output), input.max());
-}
-
-/**
- * Test the mean pooling rule.
- */
-BOOST_AUTO_TEST_CASE(MeanPoolingTest)
-{
-  // The data was generated by magic(6) in MATLAB.
-  arma::mat input, output;
-  input << 35 << 1 << 6 << 26 << 19 << 24 << arma::endr
-        << 3 << 32 << 7 << 21 << 23 << 25 << arma::endr
-        << 31 << 9 << 2 << 22 << 27 << 20 << arma::endr
-        << 8 << 28 << 33 << 17 << 10 << 15 << arma::endr
-        << 30 << 5 << 34 << 12 << 14 << 16 << arma::endr
-        << 4 << 36 << 29 << 13 << 18 << 11;
-
-  // Expected output of the generated 6 x 6 matrix.
-  const double poolingOutput = 18.5;
-
-  MeanPooling poolingRule;
-
-  // Test the pooling function.
-  BOOST_REQUIRE_EQUAL(poolingRule.Pooling(input), poolingOutput);
-
-  // Test the unpooling function.
-  poolingRule.Unpooling(input, input.max(), output);
-  bool b = arma::all(arma::vectorise(output) == (input.max() / input.n_elem));
-  BOOST_REQUIRE_EQUAL(b, true);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/recurrent_network_test.cpp b/src/mlpack/tests/recurrent_network_test.cpp
deleted file mode 100644
index c49ae42..0000000
--- a/src/mlpack/tests/recurrent_network_test.cpp
+++ /dev/null
@@ -1,604 +0,0 @@
-/**
- * @file recurrent_network_test.cpp
- * @author Marcus Edel
- *
- * Tests the recurrent network.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/recurrent_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-#include <mlpack/methods/ann/layer/lstm_layer.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-
-#include <mlpack/methods/ann/rnn.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-#include <mlpack/core/optimizers/sgd/sgd.hpp>
-#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
- #include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace mlpack;
-using namespace mlpack::ann;
-using namespace mlpack::optimization;
-
-BOOST_AUTO_TEST_SUITE(RecurrentNetworkTest);
-
-/**
- * Construct a 2-class dataset out of noisy sines.
- *
- * @param data Input data used to store the noisy sines.
- * @param labels Labels used to store the target class of the noisy sines.
- * @param points Number of points/features in a single sequence.
- * @param sequences Number of sequences for each class.
- * @param noise The noise factor that influences the sines.
- */
-void GenerateNoisySines(arma::mat& data,
-                        arma::mat& labels,
-                        const size_t points,
-                        const size_t sequences,
-                        const double noise = 0.3)
-{
-  arma::colvec x =  arma::linspace<arma::Col<double> >(0,
-      points - 1, points) / points * 20.0;
-  arma::colvec y1 = arma::sin(x + arma::as_scalar(arma::randu(1)) * 3.0);
-  arma::colvec y2 = arma::sin(x / 2.0 + arma::as_scalar(arma::randu(1)) * 3.0);
-
-  data = arma::zeros(points, sequences * 2);
-  labels = arma::zeros(2, sequences * 2);
-
-  for (size_t seq = 0; seq < sequences; seq++)
-  {
-    data.col(seq) = arma::randu(points) * noise + y1 +
-        arma::as_scalar(arma::randu(1) - 0.5) * noise;
-    labels(0, seq) = 1;
-
-    data.col(sequences + seq) = arma::randu(points) * noise + y2 +
-        arma::as_scalar(arma::randu(1) - 0.5) * noise;
-    labels(1, sequences + seq) = 1;
-  }
-}
-
-/**
- * Train the vanilla network on a larger dataset.
- */
-BOOST_AUTO_TEST_CASE(SequenceClassificationTest)
-{
-  // It isn't guaranteed that the recurrent network will converge in the
-  // specified number of iterations using random weights. If this works 1 of 5
-  // times, I'm fine with that. All I want to know is that the network is able
-  // to escape from local minima and to solve the task.
-  size_t successes = 0;
-
-  for (size_t trial = 0; trial < 5; ++trial)
-  {
-    // Generate 12 (2 * 6) noisy sines. A single sine contains 10 points/features.
-    arma::mat input, labels;
-    GenerateNoisySines(input, labels, 10, 6);
-
-    /*
-     * Construct a network with 1 input unit, 4 hidden units and 2 output units.
-     * The hidden layer is connected to itself. The network structure looks like:
-     *
-     *  Input         Hidden        Output
-     * Layer(1)      Layer(4)      Layer(2)
-     * +-----+       +-----+       +-----+
-     * |     |       |     |       |     |
-     * |     +------>|     +------>|     |
-     * |     |    ..>|     |       |     |
-     * +-----+    .  +--+--+       +-----+
-     *            .     .
-     *            .     .
-     *            .......
-     */
-    LinearLayer<> linearLayer0(1, 4);
-    RecurrentLayer<> recurrentLayer0(4);
-    BaseLayer<LogisticFunction> inputBaseLayer;
-
-    LinearLayer<> hiddenLayer(4, 2);
-    BaseLayer<LogisticFunction> hiddenBaseLayer;
-
-    BinaryClassificationLayer classOutputLayer;
-
-    auto modules = std::tie(linearLayer0, recurrentLayer0, inputBaseLayer,
-                            hiddenLayer, hiddenBaseLayer);
-
-    RNN<decltype(modules), BinaryClassificationLayer, RandomInitialization,
-        MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-    SGD<decltype(net)> opt(net, 0.5, 500 * input.n_cols, -100);
-
-    net.Train(input, labels, opt);
-
-    arma::mat prediction;
-    net.Predict(input, prediction);
-
-    size_t error = 0;
-    for (size_t i = 0; i < labels.n_cols; i++)
-    {
-      if (arma::sum(arma::sum(arma::abs(prediction.col(i) - labels.col(i)))) == 0)
-      {
-        error++;
-      }
-    }
-
-    double classificationError = 1 - double(error) / labels.n_cols;
-    if (classificationError <= 0.2)
-    {
-      ++successes;
-      break;
-    }
-  }
-
-  BOOST_REQUIRE_GE(successes, 1);
-}
-
-/**
- * Generate a random Reber grammar.
- *
- * For more information, see the following thesis.
- *
- * @code
- * @misc{Gers2001,
- *   author = {Felix Gers},
- *   title = {Long Short-Term Memory in Recurrent Neural Networks},
- *   year = {2001}
- * }
- * @endcode
- *
- * @param transitions Reber grammar transition matrix.
- * @param reber The generated Reber grammar string.
- */
-void GenerateReber(const arma::Mat<char>& transitions, std::string& reber)
-{
-  size_t idx = 0;
-  reber = "B";
-
-  do
-  {
-    const int grammerIdx = rand() % 2;
-    reber += arma::as_scalar(transitions.submat(idx, grammerIdx, idx,
-        grammerIdx));
-
-    idx = arma::as_scalar(transitions.submat(idx, grammerIdx + 2, idx,
-        grammerIdx + 2)) - '0';
-  } while (idx != 0);
-
-  reber =  "BPTVVE";
-}
-
-/**
- * Generate a random embedded Reber grammar.
- *
- * @param transitions Embedded Reber grammar transition matrix.
- * @param reber The generated embedded Reber grammar string.
- */
-void GenerateEmbeddedReber(const arma::Mat<char>& transitions,
-                           std::string& reber)
-{
-  GenerateReber(transitions, reber);
-  const char c = (rand() % 2) == 1 ? 'P' : 'T';
-  reber = c + reber + c;
-  reber = "B" + reber + "E";
-}
-
-/**
- * Convert a Reber symbol to a unit vector.
- *
- * @param symbol Reber symbol to be converted.
- * @param translation The converted symbol stored as unit vector.
- */
-void ReberTranslation(const char symbol, arma::colvec& translation)
-{
-  arma::Col<char> symbols;
-  symbols << 'B' << 'T' << 'S' << 'X' << 'P' << 'V' << 'E' << arma::endr;
-  const int idx = arma::as_scalar(arma::find(symbols == symbol, 1, "first"));
-
-  translation = arma::zeros<arma::colvec>(7);
-  translation(idx) = 1;
-}
-
-/**
- * Convert a unit vector to a Reber symbol.
- *
- * @param translation The unit vector to be converted.
- * @param symbol The converted unit vector stored as Reber symbol.
- */
-void ReberReverseTranslation(const arma::colvec& translation, char& symbol)
-{
-  arma::Col<char> symbols;
-  symbols << 'B' << 'T' << 'S' << 'X' << 'P' << 'V' << 'E' << arma::endr;
-  const int idx = arma::as_scalar(arma::find(translation == 1, 1, "first"));
-
-  symbol = symbols(idx);
-}
-
-/**
- * Given a Reber string, return a Reber string with all reachable next symbols.
- *
- * @param transitions The Reber transistion matrix.
- * @param reber The Reber string used to generate all reachable next symbols.
- * @param nextReber All reachable next symbols.
- */
-void GenerateNextReber(const arma::Mat<char>& transitions,
-                       const std::string& reber, std::string& nextReber)
-{
-  size_t idx = 0;
-
-  for (size_t grammer = 1; grammer < reber.length(); grammer++)
-  {
-    const int grammerIdx = arma::as_scalar(arma::find(
-        transitions.row(idx) == reber[grammer], 1, "first"));
-
-    idx = arma::as_scalar(transitions.submat(idx, grammerIdx + 2, idx,
-        grammerIdx + 2)) - '0';
-  }
-
-  nextReber = arma::as_scalar(transitions.submat(idx, 0, idx, 0));
-  nextReber += arma::as_scalar(transitions.submat(idx, 1, idx, 1));
-}
-
-/**
- * Given a embedded Reber string, return a embedded Reber string with all
- * reachable next symbols.
- *
- * @param transitions The Reber transistion matrix.
- * @param reber The Reber string used to generate all reachable next symbols.
- * @param nextReber All reachable next symbols.
- */
-void GenerateNextEmbeddedReber(const arma::Mat<char>& transitions,
-                               const std::string& reber, std::string& nextReber)
-{
-  if (reber.length() <= 2)
-  {
-    nextReber = reber.length() == 1 ? "TP" : "B";
-  }
-  else
-  {
-    size_t pos = reber.find('E');
-    if (pos != std::string::npos)
-    {
-      nextReber = pos == reber.length() - 1 ? std::string(1, reber[1]) : "E";
-    }
-    else
-    {
-      GenerateNextReber(transitions, reber.substr(2), nextReber);
-    }
-  }
-}
-
-/**
- * Train the specified network and the construct a Reber grammar dataset.
- */
-template<typename HiddenLayerType>
-void ReberGrammarTestNetwork(HiddenLayerType& hiddenLayer0,
-                             bool embedded = false)
-{
-  // Reber state transition matrix. (The last two columns are the indices to the
-  // next path).
-  arma::Mat<char> transitions;
-  transitions << 'T' << 'P' << '1' << '2' << arma::endr
-              << 'X' << 'S' << '3' << '1' << arma::endr
-              << 'V' << 'T' << '4' << '2' << arma::endr
-              << 'X' << 'S' << '2' << '5' << arma::endr
-              << 'P' << 'V' << '3' << '5' << arma::endr
-              << 'E' << 'E' << '0' << '0' << arma::endr;
-
-  const size_t trainReberGrammarCount = 1000;
-  const size_t testReberGrammarCount = 1000;
-
-  std::string trainReber, testReber;
-  arma::field<arma::mat> trainInput(1, trainReberGrammarCount);
-  arma::field<arma::mat> trainLabels(1, trainReberGrammarCount);
-  arma::field<arma::mat> testInput(1, testReberGrammarCount);
-  arma::colvec translation;
-
-  // Generate the training data.
-  for (size_t i = 0; i < trainReberGrammarCount; i++)
-  {
-    if (embedded)
-      GenerateEmbeddedReber(transitions, trainReber);
-    else
-      GenerateReber(transitions, trainReber);
-
-    for (size_t j = 0; j < trainReber.length() - 1; j++)
-    {
-      ReberTranslation(trainReber[j], translation);
-      trainInput(0, i) = arma::join_cols(trainInput(0, i), translation);
-
-      ReberTranslation(trainReber[j + 1], translation);
-      trainLabels(0, i) = arma::join_cols(trainLabels(0, i), translation);
-    }
-  }
-
-  // Generate the test data.
-  for (size_t i = 0; i < testReberGrammarCount; i++)
-  {
-    if (embedded)
-      GenerateEmbeddedReber(transitions, testReber);
-    else
-      GenerateReber(transitions, testReber);
-
-    for (size_t j = 0; j < testReber.length() - 1; j++)
-    {
-      ReberTranslation(testReber[j], translation);
-      testInput(0, i) = arma::join_cols(testInput(0, i), translation);
-    }
-  }
-
-  /*
-   * Construct a network with 7 input units, layerSize hidden units and 7 output
-   * units. The hidden layer is connected to itself. The network structure looks
-   * like:
-   *
-   *  Input         Hidden        Output
-   * Layer(7)  Layer(layerSize)   Layer(7)
-   * +-----+       +-----+       +-----+
-   * |     |       |     |       |     |
-   * |     +------>|     +------>|     |
-   * |     |    ..>|     |       |     |
-   * +-----+    .  +--+--+       +-----+
-   *            .     .
-   *            .     .
-   *            .......
-   */
-  const size_t lstmSize = 4 * 10;
-  LinearLayer<> linearLayer0(7, lstmSize);
-  RecurrentLayer<> recurrentLayer0(10, lstmSize);
-
-  LinearLayer<>hiddenLayer(10, 7);
-  BaseLayer<LogisticFunction> hiddenBaseLayer;
-
-  BinaryClassificationLayer classOutputLayer;
-
-  auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0,
-                          hiddenLayer, hiddenBaseLayer);
-
-  RNN<decltype(modules), BinaryClassificationLayer, RandomInitialization,
-      MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-  SGD<decltype(net)> opt(net, 0.5, 2, -200);
-
-  arma::mat inputTemp, labelsTemp;
-  for (size_t i = 0; i < 15; i++)
-  {
-    for (size_t j = 0; j < trainReberGrammarCount; j++)
-    {
-      inputTemp = trainInput.at(0, j);
-      labelsTemp = trainLabels.at(0, j);
-      net.Train(inputTemp, labelsTemp, opt);
-    }
-  }
-
-  double error = 0;
-
-  // Ask the network to predict the next Reber grammar in the given sequence.
-  for (size_t i = 0; i < testReberGrammarCount; i++)
-  {
-    arma::mat output;
-    arma::mat input = testInput.at(0, i);
-
-    net.Predict(input, output);
-
-    const size_t reberGrammerSize = 7;
-    std::string inputReber = "";
-
-    size_t reberError = 0;
-    for (size_t j = 0; j < (output.n_elem / reberGrammerSize); j++)
-    {
-      if (arma::sum(arma::sum(output.submat(j * reberGrammerSize, 0, (j + 1) *
-          reberGrammerSize - 1, 0))) != 1) break;
-
-      char predictedSymbol, inputSymbol;
-      std::string reberChoices;
-
-      ReberReverseTranslation(output.submat(j * reberGrammerSize, 0, (j + 1) *
-          reberGrammerSize - 1, 0), predictedSymbol);
-      ReberReverseTranslation(input.submat(j * reberGrammerSize, 0, (j + 1) *
-          reberGrammerSize - 1, 0), inputSymbol);
-      inputReber += inputSymbol;
-
-      if (embedded)
-        GenerateNextEmbeddedReber(transitions, inputReber, reberChoices);
-      else
-        GenerateNextReber(transitions, inputReber, reberChoices);
-
-      if (reberChoices.find(predictedSymbol) != std::string::npos)
-        reberError++;
-    }
-
-    if (reberError != (output.n_elem / reberGrammerSize))
-      error += 1;
-  }
-
-  error /= testReberGrammarCount;
-  BOOST_REQUIRE_LE(error, 0.2);
-}
-
-/**
- * Train the specified networks on a Reber grammar dataset.
- */
-BOOST_AUTO_TEST_CASE(ReberGrammarTest)
-{
-  LSTMLayer<> hiddenLayerLSTM(10);
-  ReberGrammarTestNetwork(hiddenLayerLSTM);
-}
-
-/**
- * Train the specified networks on an embedded Reber grammar dataset.
- */
-BOOST_AUTO_TEST_CASE(EmbeddedReberGrammarTest)
-{
-  LSTMLayer<> hiddenLayerLSTM(10);
-  ReberGrammarTestNetwork(hiddenLayerLSTM, true);
-}
-
-/*
- * This sample is a simplified version of Derek D. Monner's Distracted Sequence
- * Recall task, which involves 10 symbols:
- *
- * Targets: must be recognized and remembered by the network.
- * Distractors: never need to be remembered.
- * Prompts: direct the network to give an answer.
- *
- * A single trial consists of a temporal sequence of 10 input symbols. The first
- * 8 consist of 2 randomly chosen target symbols and 6 randomly chosen
- * distractor symbols in an random order. The remaining two symbols are two
- * prompts, which direct the network to produce the first and second target in
- * the sequence, in order.
- *
- * For more information, see the following paper.
- *
- * @code
- * @misc{Monner2012,
- *   author = {Monner, Derek and Reggia, James A},
- *   title = {A generalized LSTM-like training algorithm for second-order
- *   recurrent neural networks},
- *   year = {2012}
- * }
- * @endcode
- *
- * @param input The generated input sequence.
- * @param input The generated output sequence.
- */
-void GenerateDistractedSequence(arma::mat& input, arma::mat& output)
-{
-  input = arma::zeros<arma::mat>(10, 10);
-  output = arma::zeros<arma::mat>(3, 10);
-
-  arma::Col<size_t> index = arma::shuffle(arma::linspace<arma::Col<size_t> >(
-      0, 7, 8));
-
-  // Set the target in the input sequence and the corresponding targets in the
-  // output sequence by following the correct order.
-  for (size_t i = 0; i < 2; i++)
-  {
-    size_t idx = rand() % 2;
-    input(idx, index(i)) = 1;
-    output(idx, index(i) > index(i == 0) ? 9 : 8) = 1;
-  }
-
-  for (size_t i = 2; i < 8; i++)
-    input(2 + rand() % 6, index(i)) = 1;
-
-
-  // Set the prompts which direct the network to give an answer.
-  input(8, 8) = 1;
-  input(9, 9) = 1;
-
-  input.reshape(input.n_elem, 1);
-  output.reshape(output.n_elem, 1);
-}
-
-/**
- * Train the specified network and the construct distracted sequence recall
- * dataset.
- */
-template<typename HiddenLayerType>
-void DistractedSequenceRecallTestNetwork(HiddenLayerType& hiddenLayer0)
-{
-  const size_t trainDistractedSequenceCount = 1000;
-  const size_t testDistractedSequenceCount = 1000;
-
-  arma::field<arma::mat> trainInput(1, trainDistractedSequenceCount);
-  arma::field<arma::mat> trainLabels(1, trainDistractedSequenceCount);
-  arma::field<arma::mat> testInput(1, testDistractedSequenceCount);
-  arma::field<arma::mat> testLabels(1, testDistractedSequenceCount);
-
-  // Generate the training data.
-  for (size_t i = 0; i < trainDistractedSequenceCount; i++)
-    GenerateDistractedSequence(trainInput(0, i), trainLabels(0, i));
-
-  // Generate the test data.
-  for (size_t i = 0; i < testDistractedSequenceCount; i++)
-    GenerateDistractedSequence(testInput(0, i), testLabels(0, i));
-
-  /*
-   * Construct a network with 10 input units, layerSize hidden units and 3
-   * output units. The hidden layer is connected to itself. The network
-   * structure looks like:
-   *
-   *  Input         Hidden        Output
-   * Layer(10)  Layer(layerSize)   Layer(3)
-   * +-----+       +-----+       +-----+
-   * |     |       |     |       |     |
-   * |     +------>|     +------>|     |
-   * |     |    ..>|     |       |     |
-   * +-----+    .  +--+--+       +-----+
-   *            .     .
-   *            .     .
-   *            .......
-   */
-  const size_t lstmSize = 4 * 10;
-  LinearLayer<> linearLayer0(10, lstmSize);
-  RecurrentLayer<> recurrentLayer0(10, lstmSize);
-
-  LinearLayer<> hiddenLayer(10, 3);
-  TanHLayer<> hiddenBaseLayer;
-
-  BinaryClassificationLayer classOutputLayer;
-
-  auto modules = std::tie(linearLayer0, recurrentLayer0, hiddenLayer0,
-                          hiddenLayer, hiddenBaseLayer);
-
-  RNN<decltype(modules), BinaryClassificationLayer, NguyenWidrowInitialization,
-      MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-  SGD<decltype(net)> opt(net, 0.04, 2, -200);
-
-  arma::mat inputTemp, labelsTemp;
-  for (size_t i = 0; i < 40; i++)
-  {
-    for (size_t j = 0; j < trainDistractedSequenceCount; j++)
-    {
-      inputTemp = trainInput.at(0, j);
-      labelsTemp = trainLabels.at(0, j);
-
-      net.Train(inputTemp, labelsTemp, opt);
-    }
-  }
-
-  double error = 0;
-
-  // Ask the network to predict the targets in the given sequence at the
-  // prompts.
-  for (size_t i = 0; i < testDistractedSequenceCount; i++)
-  {
-    arma::mat output;
-    arma::mat input = testInput.at(0, i);
-
-    net.Predict(input, output);
-
-    if (arma::accu(arma::abs(testLabels.at(0, i) - output)) != 0)
-      error += 1;
-  }
-
-  error /= testDistractedSequenceCount;
-
-  // Can we reproduce the results from the paper. They provide an 95% accuracy
-  // on a test set of 1000 randomly selected sequences.
-  // Ensure that this is within tolerance, which is at least as good as the
-  // paper's results (plus a little bit for noise).
-  BOOST_REQUIRE_LE(error, 0.3);
-}
-
-/**
- * Train the specified networks on the Derek D. Monner's distracted sequence
- * recall task.
- */
-BOOST_AUTO_TEST_CASE(DistractedSequenceRecallTest)
-{
-  LSTMLayer<> hiddenLayerLSTMPeephole(10, true);
-  DistractedSequenceRecallTestNetwork(hiddenLayerLSTMPeephole);
-}
-
-BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/mlpack/tests/rmsprop_test.cpp b/src/mlpack/tests/rmsprop_test.cpp
deleted file mode 100644
index 481741a..0000000
--- a/src/mlpack/tests/rmsprop_test.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-/**
- * @file rmsprop_test.cpp
- * @author Marcus Edel
- *
- * Tests the RMSProp optimizer.
- *
- * mlpack is free software; you may redistribute it and/or modify it under the
- * terms of the 3-clause BSD license.  You should have received a copy of the
- * 3-clause BSD license along with mlpack.  If not, see
- * http://www.opensource.org/licenses/BSD-3-Clause for more information.
- */
-#include <mlpack/core.hpp>
-
-#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
-#include <mlpack/core/optimizers/sgd/test_function.hpp>
-
-#include <mlpack/methods/logistic_regression/logistic_regression.hpp>
-
-#include <mlpack/methods/ann/ffn.hpp>
-#include <mlpack/methods/ann/init_rules/random_init.hpp>
-#include <mlpack/methods/ann/performance_functions/mse_function.hpp>
-#include <mlpack/methods/ann/layer/binary_classification_layer.hpp>
-#include <mlpack/methods/ann/layer/bias_layer.hpp>
-#include <mlpack/methods/ann/layer/linear_layer.hpp>
-#include <mlpack/methods/ann/layer/base_layer.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include "test_tools.hpp"
-
-using namespace arma;
-using namespace mlpack;
-using namespace mlpack::optimization;
-using namespace mlpack::optimization::test;
-
-using namespace mlpack::distribution;
-using namespace mlpack::regression;
-
-using namespace mlpack::ann;
-
-BOOST_AUTO_TEST_SUITE(RMSpropTest);
-
-/**
- * Tests the RMSprop optimizer using a simple test function.
- */
-BOOST_AUTO_TEST_CASE(SimpleRMSpropTestFunction)
-{
-  SGDTestFunction f;
-  RMSprop<SGDTestFunction> optimizer(f, 1e-3, 0.99, 1e-8, 5000000, 1e-9, true);
-
-  arma::mat coordinates = f.GetInitialPoint();
-  optimizer.Optimize(coordinates);
-
-  BOOST_REQUIRE_SMALL(coordinates[0], 0.1);
-  BOOST_REQUIRE_SMALL(coordinates[1], 0.1);
-  BOOST_REQUIRE_SMALL(coordinates[2], 0.1);
-}
-
-/**
- * Run RMSprop on logistic regression and make sure the results are acceptable.
- */
-BOOST_AUTO_TEST_CASE(LogisticRegressionTest)
-{
-  // Generate a two-Gaussian dataset.
-  GaussianDistribution g1(arma::vec("1.0 1.0 1.0"), arma::eye<arma::mat>(3, 3));
-  GaussianDistribution g2(arma::vec("9.0 9.0 9.0"), arma::eye<arma::mat>(3, 3));
-
-  arma::mat data(3, 1000);
-  arma::Row<size_t> responses(1000);
-  for (size_t i = 0; i < 500; ++i)
-  {
-    data.col(i) = g1.Random();
-    responses[i] = 0;
-  }
-  for (size_t i = 500; i < 1000; ++i)
-  {
-    data.col(i) = g2.Random();
-    responses[i] = 1;
-  }
-
-  // Shuffle the dataset.
-  arma::uvec indices = arma::shuffle(arma::linspace<arma::uvec>(0,
-      data.n_cols - 1, data.n_cols));
-  arma::mat shuffledData(3, 1000);
-  arma::Row<size_t> shuffledResponses(1000);
-  for (size_t i = 0; i < data.n_cols; ++i)
-  {
-    shuffledData.col(i) = data.col(indices[i]);
-    shuffledResponses[i] = responses[indices[i]];
-  }
-
-  // Create a test set.
-  arma::mat testData(3, 1000);
-  arma::Row<size_t> testResponses(1000);
-  for (size_t i = 0; i < 500; ++i)
-  {
-    testData.col(i) = g1.Random();
-    testResponses[i] = 0;
-  }
-  for (size_t i = 500; i < 1000; ++i)
-  {
-    testData.col(i) = g2.Random();
-    testResponses[i] = 1;
-  }
-
-  LogisticRegression<> lr(shuffledData.n_rows, 0.5);
-
-  LogisticRegressionFunction<> lrf(shuffledData, shuffledResponses, 0.5);
-  RMSprop<LogisticRegressionFunction<> > rmsprop(lrf);
-  lr.Train(rmsprop);
-
-  // Ensure that the error is close to zero.
-  const double acc = lr.ComputeAccuracy(data, responses);
-  BOOST_REQUIRE_CLOSE(acc, 100.0, 0.3); // 0.3% error tolerance.
-
-  const double testAcc = lr.ComputeAccuracy(testData, testResponses);
-  BOOST_REQUIRE_CLOSE(testAcc, 100.0, 0.6); // 0.6% error tolerance.
-}
-
-/**
- * Run RMSprop on a feedforward neural network and make sure the results are
- * acceptable.
- */
-BOOST_AUTO_TEST_CASE(FeedforwardTest)
-{
-  // Test on a non-linearly separable dataset (XOR).
-  arma::mat input, labels;
-  input << 0 << 1 << 1 << 0 << arma::endr
-        << 1 << 0 << 1 << 0 << arma::endr;
-  labels << 1 << 1 << 0 << 0;
-
-  // Instantiate the first layer.
-  LinearLayer<> inputLayer(input.n_rows, 8);
-  BiasLayer<> biasLayer(8);
-  TanHLayer<> hiddenLayer0;
-
-  // Instantiate the second layer.
-  LinearLayer<> hiddenLayer1(8, labels.n_rows);
-  TanHLayer<> outputLayer;
-
-  // Instantiate the output layer.
-  BinaryClassificationLayer classOutputLayer;
-
-  // Instantiate the feedforward network.
-  auto modules = std::tie(inputLayer, biasLayer, hiddenLayer0, hiddenLayer1,
-      outputLayer);
-  FFN<decltype(modules), decltype(classOutputLayer), RandomInitialization,
-      MeanSquaredErrorFunction> net(modules, classOutputLayer);
-
-  RMSprop<decltype(net)> opt(net, 0.03, 0.99, 1e-8, 300 * input.n_cols, -10);
-
-  net.Train(input, labels, opt);
-
-  arma::mat prediction;
-  net.Predict(input, prediction);
-
-  BOOST_REQUIRE_EQUAL(prediction(0), 1);
-  BOOST_REQUIRE_EQUAL(prediction(1), 1);
-  BOOST_REQUIRE_EQUAL(prediction(2), 0);
-  BOOST_REQUIRE_EQUAL(prediction(3), 0);
-}
-
-BOOST_AUTO_TEST_SUITE_END();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git