[mlpack] 04/207: Replaced all instances of `#include <mlpack/core.hpp>` with `#include <mlpack/prereqs.hpp>`. This doesn't affect build times when building the whole project, but it greatly reduces build times after modifying only a few files.

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu Mar 23 17:53:35 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit 77a029ba0e27a63601d39fec7ad7c461cb65ab98
Author: Mike Izbicki <mike at izbicki.me>
Date:   Fri Dec 23 02:00:18 2016 -0800

    Replaced all instances of `#include <mlpack/core.hpp>` with `#include <mlpack/prereqs.hpp>`.  This doesn't affect build times when building the whole project, but it greatly reduces build times after modifying only a few files.
---
 src/mlpack/core/boost_backport/policy.hpp          |   3 +
 src/mlpack/core/data/binarize.hpp                  |   2 +-
 src/mlpack/core/data/dataset_mapper.hpp            |   2 +-
 .../data/imputation_methods/custom_imputation.hpp  |   2 +-
 .../data/imputation_methods/listwise_deletion.hpp  |   2 +-
 .../data/imputation_methods/mean_imputation.hpp    |   2 +-
 .../data/imputation_methods/median_imputation.hpp  |   2 +-
 src/mlpack/core/data/imputer.hpp                   |   2 +-
 src/mlpack/core/data/map_policies/datatype.hpp     |   2 +-
 .../core/data/map_policies/increment_policy.hpp    |   2 +-
 .../core/data/map_policies/missing_policy.hpp      |   2 +-
 src/mlpack/core/data/split_data.hpp                |   2 +-
 src/mlpack/core/dists/discrete_distribution.hpp    |   4 +-
 src/mlpack/core/dists/gamma_distribution.hpp       |   4 +-
 src/mlpack/core/dists/gaussian_distribution.hpp    |   2 +-
 src/mlpack/core/dists/laplace_distribution.cpp     |   2 +-
 src/mlpack/core/dists/regression_distribution.hpp  |   2 +-
 src/mlpack/core/kernels/cosine_distance.hpp        |   3 +-
 src/mlpack/core/kernels/epanechnikov_kernel.hpp    |   3 +-
 .../core/kernels/epanechnikov_kernel_impl.hpp      |   1 +
 src/mlpack/core/kernels/example_kernel.hpp         |   2 +-
 src/mlpack/core/kernels/gaussian_kernel.hpp        |   3 +-
 .../core/kernels/hyperbolic_tangent_kernel.hpp     |   2 +-
 src/mlpack/core/kernels/laplacian_kernel.hpp       |   2 +-
 src/mlpack/core/kernels/linear_kernel.hpp          |   2 +-
 src/mlpack/core/kernels/polynomial_kernel.hpp      |   2 +-
 .../core/kernels/pspectrum_string_kernel.hpp       |   3 +-
 src/mlpack/core/kernels/spherical_kernel.hpp       |   2 +-
 src/mlpack/core/kernels/triangular_kernel.hpp      |   2 +-
 src/mlpack/core/math/columns_to_blocks.hpp         |   2 +-
 src/mlpack/core/math/lin_alg.cpp                   |   3 +-
 src/mlpack/core/metrics/lmetric.hpp                |   2 +-
 src/mlpack/core/metrics/mahalanobis_distance.hpp   |   2 +-
 src/mlpack/core/optimizers/adadelta/ada_delta.hpp  |   2 +-
 src/mlpack/core/optimizers/adam/adam.hpp           |   2 +-
 .../optimizers/aug_lagrangian/aug_lagrangian.hpp   |   2 +-
 .../aug_lagrangian/aug_lagrangian_function.hpp     |   2 +-
 .../aug_lagrangian_test_functions.hpp              |   2 +-
 .../gradient_descent/gradient_descent.hpp          |   2 +-
 .../optimizers/gradient_descent/test_function.hpp  |   2 +-
 src/mlpack/core/optimizers/lbfgs/lbfgs.hpp         |   2 +-
 .../core/optimizers/lbfgs/test_functions.hpp       |   2 +-
 .../optimizers/minibatch_sgd/minibatch_sgd.hpp     |   2 +-
 src/mlpack/core/optimizers/rmsprop/rmsprop.hpp     |   2 +-
 src/mlpack/core/optimizers/sdp/lrsdp.hpp           |   2 +-
 src/mlpack/core/optimizers/sdp/lrsdp_function.hpp  |   2 +-
 src/mlpack/core/optimizers/sdp/primal_dual.hpp     |   2 +-
 src/mlpack/core/optimizers/sdp/sdp.hpp             |   3 +-
 src/mlpack/core/optimizers/sgd/sgd.hpp             |   2 +-
 src/mlpack/core/optimizers/sgd/test_function.hpp   |   2 +-
 src/mlpack/core/tree/ballbound.hpp                 |   2 +-
 src/mlpack/core/tree/ballbound_impl.hpp            |   1 +
 src/mlpack/core/tree/binary_space_tree.hpp         |   2 +-
 .../tree/binary_space_tree/binary_space_tree.hpp   |   2 +-
 .../breadth_first_dual_tree_traverser.hpp          |   2 +-
 .../tree/binary_space_tree/dual_tree_traverser.hpp |   2 +-
 .../core/tree/binary_space_tree/mean_split.hpp     |   2 +-
 .../core/tree/binary_space_tree/midpoint_split.hpp |   2 +-
 .../tree/binary_space_tree/rp_tree_max_split.hpp   |   2 +-
 .../tree/binary_space_tree/rp_tree_mean_split.hpp  |   3 +-
 .../binary_space_tree/single_tree_traverser.hpp    |   2 +-
 .../core/tree/binary_space_tree/ub_tree_split.hpp  |   2 +-
 .../tree/binary_space_tree/vantage_point_split.hpp |   3 +-
 src/mlpack/core/tree/cellbound.hpp                 |   2 +-
 src/mlpack/core/tree/cosine_tree/cosine_tree.cpp   |   1 +
 src/mlpack/core/tree/cosine_tree/cosine_tree.hpp   |   2 +-
 src/mlpack/core/tree/cover_tree.hpp                |   2 +-
 src/mlpack/core/tree/cover_tree/cover_tree.hpp     |   3 +-
 .../core/tree/cover_tree/dual_tree_traverser.hpp   |   2 +-
 .../tree/cover_tree/dual_tree_traverser_impl.hpp   |   2 +-
 .../core/tree/cover_tree/first_point_is_root.hpp   |   2 +-
 .../core/tree/cover_tree/single_tree_traverser.hpp |   2 +-
 .../core/tree/greedy_single_tree_traverser.hpp     |   2 +-
 src/mlpack/core/tree/hollow_ball_bound.hpp         |   2 +-
 src/mlpack/core/tree/hrectbound.hpp                |   2 +-
 src/mlpack/core/tree/octree.hpp                    |   2 +-
 .../core/tree/octree/dual_tree_traverser.hpp       |   2 +-
 src/mlpack/core/tree/octree/octree.hpp             |   2 +-
 .../core/tree/octree/single_tree_traverser.hpp     |   2 +-
 .../tree/rectangle_tree/discrete_hilbert_value.hpp |   2 +-
 .../tree/rectangle_tree/dual_tree_traverser.hpp    |   2 +-
 .../hilbert_r_tree_descent_heuristic.hpp           |   2 +-
 .../tree/rectangle_tree/hilbert_r_tree_split.hpp   |   2 +-
 .../r_plus_plus_tree_auxiliary_information.hpp     |   2 +-
 .../r_plus_plus_tree_descent_heuristic.hpp         |   2 +-
 .../r_plus_tree_descent_heuristic.hpp              |   2 +-
 .../core/tree/rectangle_tree/r_plus_tree_split.hpp |   2 +-
 .../r_star_tree_descent_heuristic.hpp              |   2 +-
 .../core/tree/rectangle_tree/r_star_tree_split.hpp |   2 +-
 .../rectangle_tree/r_tree_descent_heuristic.hpp    |   2 +-
 .../core/tree/rectangle_tree/r_tree_split.hpp      |   2 +-
 .../core/tree/rectangle_tree/rectangle_tree.hpp    |   2 +-
 .../tree/rectangle_tree/single_tree_traverser.hpp  |   2 +-
 .../core/tree/rectangle_tree/x_tree_split.hpp      |   2 +-
 src/mlpack/core/tree/space_split/hyperplane.hpp    |   2 +-
 .../core/tree/space_split/mean_space_split.hpp     |   2 +-
 .../core/tree/space_split/midpoint_space_split.hpp |   2 +-
 .../core/tree/space_split/projection_vector.hpp    |   2 +-
 src/mlpack/core/tree/space_split/space_split.hpp   |   2 +-
 src/mlpack/core/tree/spill_tree.hpp                |   2 +-
 .../tree/spill_tree/spill_dual_tree_traverser.hpp  |   2 +-
 .../spill_tree/spill_single_tree_traverser.hpp     |   2 +-
 src/mlpack/core/tree/spill_tree/spill_tree.hpp     |   2 +-
 src/mlpack/core/util/param.hpp                     |   2 +
 src/mlpack/methods/adaboost/adaboost.hpp           |   2 +-
 src/mlpack/methods/adaboost/adaboost_main.cpp      |   4 +-
 src/mlpack/methods/amf/amf.hpp                     |   2 +-
 src/mlpack/methods/amf/init_rules/average_init.hpp |   2 +-
 src/mlpack/methods/amf/init_rules/given_init.hpp   |   2 +-
 .../methods/amf/init_rules/random_acol_init.hpp    |   3 +-
 src/mlpack/methods/amf/init_rules/random_init.hpp  |   2 +-
 .../incomplete_incremental_termination.hpp         |   2 +-
 .../simple_residue_termination.hpp                 |   2 +-
 .../simple_tolerance_termination.hpp               |   2 +-
 .../validation_RMSE_termination.hpp                |   2 +-
 src/mlpack/methods/amf/update_rules/nmf_als.hpp    |   2 +-
 .../methods/amf/update_rules/nmf_mult_dist.hpp     |   2 +-
 .../methods/amf/update_rules/nmf_mult_div.hpp      |   2 +-
 .../amf/update_rules/svd_batch_learning.hpp        |   2 +-
 .../svd_complete_incremental_learning.hpp          |   2 +-
 .../ann/activation_functions/identity_function.hpp |  96 ++
 .../ann/activation_functions/logistic_function.hpp | 114 +++
 .../activation_functions/rectifier_function.hpp    | 115 +++
 .../ann/activation_functions/softsign_function.hpp | 134 +++
 .../ann/activation_functions/tanh_function.hpp     | 105 +++
 src/mlpack/methods/ann/cnn.hpp                     | 448 ++++++++++
 .../ann/convolution_rules/fft_convolution.hpp      | 221 +++++
 .../ann/convolution_rules/naive_convolution.hpp    | 190 ++++
 .../ann/convolution_rules/svd_convolution.hpp      | 199 +++++
 src/mlpack/methods/ann/ffn.hpp                     | 447 ++++++++++
 .../kathirvalavakumar_subavathi_init.hpp           | 121 +++
 .../methods/ann/init_rules/nguyen_widrow_init.hpp  | 117 +++
 src/mlpack/methods/ann/init_rules/oivs_init.hpp    | 130 +++
 .../methods/ann/init_rules/orthogonal_init.hpp     |  82 ++
 src/mlpack/methods/ann/init_rules/random_init.hpp  |   2 +-
 src/mlpack/methods/ann/init_rules/zero_init.hpp    |  65 ++
 src/mlpack/methods/ann/layer/base_layer.hpp        | 223 +++++
 src/mlpack/methods/ann/layer/bias_layer.hpp        | 208 +++++
 .../ann/layer/binary_classification_layer.hpp      | 106 +++
 src/mlpack/methods/ann/layer/constant_layer.hpp    | 121 +++
 src/mlpack/methods/ann/layer/conv_layer.hpp        | 324 +++++++
 src/mlpack/methods/ann/layer/dropconnect_layer.hpp | 361 ++++++++
 src/mlpack/methods/ann/layer/dropout_layer.hpp     | 252 ++++++
 src/mlpack/methods/ann/layer/glimpse_layer.hpp     | 484 +++++++++++
 src/mlpack/methods/ann/layer/hard_tanh_layer.hpp   | 259 ++++++
 src/mlpack/methods/ann/layer/leaky_relu_layer.hpp  | 240 +++++
 src/mlpack/methods/ann/layer/linear_layer.hpp      | 289 +++++++
 src/mlpack/methods/ann/layer/log_softmax_layer.hpp | 131 +++
 src/mlpack/methods/ann/layer/lstm_layer.hpp        | 418 +++++++++
 .../ann/layer/multiclass_classification_layer.hpp  |  98 +++
 .../methods/ann/layer/multiply_constant_layer.hpp  | 113 +++
 .../ann/layer/negative_log_likelihood_layer.hpp    | 127 +++
 src/mlpack/methods/ann/layer/one_hot_layer.hpp     |  96 ++
 src/mlpack/methods/ann/layer/pooling_layer.hpp     | 267 ++++++
 src/mlpack/methods/ann/layer/recurrent_layer.hpp   | 192 ++++
 .../methods/ann/layer/reinforce_normal_layer.hpp   | 139 +++
 src/mlpack/methods/ann/layer/softmax_layer.hpp     | 114 +++
 src/mlpack/methods/ann/layer/sparse_bias_layer.hpp | 177 ++++
 .../methods/ann/layer/sparse_input_layer.hpp       | 180 ++++
 .../methods/ann/layer/sparse_output_layer.hpp      | 227 +++++
 .../methods/ann/layer/vr_class_reward_layer.hpp    | 171 ++++
 src/mlpack/methods/ann/network_util.hpp            | 247 ++++++
 .../ann/performance_functions/cee_function.hpp     |  74 ++
 .../ann/performance_functions/mse_function.hpp     |  61 ++
 .../ann/performance_functions/sparse_function.hpp  | 141 +++
 .../ann/performance_functions/sse_function.hpp     |  64 ++
 .../methods/ann/pooling_rules/max_pooling.hpp      |  56 ++
 .../methods/ann/pooling_rules/mean_pooling.hpp     |  56 ++
 src/mlpack/methods/ann/rnn.hpp                     | 799 +++++++++++++++++
 src/mlpack/methods/approx_kfn/approx_kfn_main.cpp  |   2 +-
 src/mlpack/methods/approx_kfn/drusilla_select.hpp  |   2 +-
 src/mlpack/methods/approx_kfn/qdafn.hpp            |   3 +-
 src/mlpack/methods/cf/cf.hpp                       |   2 +-
 src/mlpack/methods/cf/cf_main.cpp                  |   4 +-
 src/mlpack/methods/cf/svd_wrapper.hpp              |   2 +-
 .../methods/decision_stump/decision_stump.hpp      |   2 +-
 .../methods/decision_stump/decision_stump_main.cpp |   4 +-
 src/mlpack/methods/det/det_main.cpp                |   3 +-
 src/mlpack/methods/det/dt_utils.hpp                |   2 +-
 src/mlpack/methods/det/dtree.hpp                   |   2 +-
 src/mlpack/methods/emst/dtb.hpp                    |   2 +-
 src/mlpack/methods/emst/dtb_rules.hpp              |   2 +-
 src/mlpack/methods/emst/dtb_stat.hpp               |   2 +-
 src/mlpack/methods/emst/edge_pair.hpp              |   2 +-
 src/mlpack/methods/emst/emst_main.cpp              |   2 +-
 src/mlpack/methods/emst/union_find.hpp             |   2 +-
 src/mlpack/methods/fastmks/fastmks.hpp             |   2 +-
 src/mlpack/methods/fastmks/fastmks_main.cpp        |   3 +-
 src/mlpack/methods/fastmks/fastmks_model.hpp       |  13 +-
 src/mlpack/methods/fastmks/fastmks_rules.hpp       |   3 +-
 src/mlpack/methods/fastmks/fastmks_stat.hpp        |   2 +-
 src/mlpack/methods/gmm/diagonal_constraint.hpp     |   2 +-
 .../methods/gmm/eigenvalue_ratio_constraint.hpp    |   2 +-
 src/mlpack/methods/gmm/em_fit.hpp                  |   3 +-
 src/mlpack/methods/gmm/gmm.hpp                     |   2 +-
 src/mlpack/methods/gmm/gmm_generate_main.cpp       |   2 +-
 src/mlpack/methods/gmm/gmm_probability_main.cpp    |   2 +-
 src/mlpack/methods/gmm/gmm_train_main.cpp          |   2 +-
 src/mlpack/methods/gmm/no_constraint.hpp           |   2 +-
 .../methods/gmm/positive_definite_constraint.hpp   |   2 +-
 src/mlpack/methods/hmm/hmm.hpp                     |   3 +-
 src/mlpack/methods/hmm/hmm_generate_main.cpp       |   2 +-
 src/mlpack/methods/hmm/hmm_loglik_main.cpp         |   2 +-
 src/mlpack/methods/hmm/hmm_regression.hpp          |   2 +-
 src/mlpack/methods/hmm/hmm_train_main.cpp          |   2 +-
 src/mlpack/methods/hmm/hmm_util.hpp                |   2 +-
 src/mlpack/methods/hmm/hmm_util_impl.hpp           |   2 +-
 src/mlpack/methods/hmm/hmm_viterbi_main.cpp        |   2 +-
 .../hoeffding_trees/binary_numeric_split_info.hpp  |   2 +-
 .../hoeffding_trees/categorical_split_info.hpp     |   2 +-
 .../methods/hoeffding_trees/gini_impurity.hpp      |   2 +-
 .../hoeffding_categorical_split.hpp                |   2 +-
 .../hoeffding_trees/hoeffding_numeric_split.hpp    |   2 +-
 .../methods/hoeffding_trees/hoeffding_tree.hpp     |   3 +-
 .../hoeffding_trees/hoeffding_tree_main.cpp        |   3 +-
 .../methods/hoeffding_trees/numeric_split_info.hpp |   2 +-
 src/mlpack/methods/kernel_pca/kernel_pca.hpp       |   2 +-
 src/mlpack/methods/kernel_pca/kernel_pca_main.cpp  |  16 +-
 .../kernel_pca/kernel_rules/naive_method.hpp       |   2 +-
 .../kernel_pca/kernel_rules/nystroem_method.hpp    |   2 +-
 src/mlpack/methods/kmeans/allow_empty_clusters.hpp |   2 +-
 src/mlpack/methods/kmeans/kill_empty_clusters.hpp  |   2 +-
 src/mlpack/methods/kmeans/kmeans.hpp               |   2 +-
 src/mlpack/methods/kmeans/kmeans_main.cpp          |   2 +-
 .../methods/kmeans/max_variance_new_cluster.hpp    |   2 +-
 src/mlpack/methods/kmeans/random_partition.hpp     |   2 +-
 src/mlpack/methods/kmeans/refined_start.hpp        |   2 +-
 .../methods/kmeans/sample_initialization.hpp       |   3 +-
 src/mlpack/methods/lars/lars.cpp                   |   2 +
 src/mlpack/methods/lars/lars.hpp                   |   2 +-
 src/mlpack/methods/lars/lars_main.cpp              |   3 +-
 .../linear_regression/linear_regression.cpp        |   1 +
 .../linear_regression/linear_regression.hpp        |   2 +-
 .../linear_regression/linear_regression_main.cpp   |   3 +-
 src/mlpack/methods/local_coordinate_coding/lcc.cpp |   1 +
 src/mlpack/methods/local_coordinate_coding/lcc.hpp |   2 +-
 .../local_coordinate_coding_main.cpp               |   3 +-
 .../logistic_regression/logistic_regression.hpp    |   2 +-
 .../logistic_regression_function.hpp               |   2 +-
 .../logistic_regression_main.cpp                   |   3 +-
 src/mlpack/methods/lsh/lsh_main.cpp                |   3 +-
 src/mlpack/methods/lsh/lsh_search.hpp              |   2 +-
 src/mlpack/methods/lsh/lsh_search_impl.hpp         |   3 +-
 src/mlpack/methods/mean_shift/mean_shift.hpp       |   2 +-
 src/mlpack/methods/mean_shift/mean_shift_main.cpp  |   3 +-
 src/mlpack/methods/mvu/mvu.hpp                     |  48 +
 src/mlpack/methods/mvu/mvu_main.cpp                |  79 ++
 .../methods/naive_bayes/naive_bayes_classifier.hpp |   2 +-
 .../naive_bayes/naive_bayes_classifier_impl.hpp    |   2 +-
 src/mlpack/methods/naive_bayes/nbc_main.cpp        |   4 +-
 src/mlpack/methods/nca/nca.hpp                     |   2 +-
 src/mlpack/methods/nca/nca_main.cpp                |   5 +-
 .../methods/nca/nca_softmax_error_function.hpp     |   2 +-
 src/mlpack/methods/neighbor_search/kfn_main.cpp    |   2 +-
 src/mlpack/methods/neighbor_search/knn_main.cpp    |   4 +-
 .../methods/neighbor_search/neighbor_search.hpp    |   3 +-
 .../neighbor_search/neighbor_search_impl.hpp       |   2 +-
 .../neighbor_search/neighbor_search_stat.hpp       |   2 +-
 .../sort_policies/furthest_neighbor_sort.hpp       |   2 +-
 .../sort_policies/nearest_neighbor_sort.hpp        |   2 +-
 src/mlpack/methods/neighbor_search/unmap.hpp       |   2 +-
 src/mlpack/methods/nmf/nmf_main.cpp                |   3 +-
 .../methods/nystroem_method/kmeans_selection.hpp   |   2 +-
 .../methods/nystroem_method/nystroem_method.hpp    |   2 +-
 .../methods/nystroem_method/ordered_selection.hpp  |   2 +-
 .../methods/nystroem_method/random_selection.hpp   |   2 +-
 .../decomposition_policies/exact_svd_method.hpp    |   2 +-
 .../pca/decomposition_policies/quic_svd_method.hpp |   2 +-
 .../randomized_svd_method.hpp                      |   2 +-
 src/mlpack/methods/pca/pca.hpp                     |   2 +-
 src/mlpack/methods/pca/pca_impl.hpp                |   3 +-
 src/mlpack/methods/pca/pca_main.cpp                |   3 +-
 .../initialization_methods/random_init.hpp         |   2 +-
 .../initialization_methods/zero_init.hpp           |   2 +-
 .../learning_policies/simple_weight_update.hpp     |   2 +-
 src/mlpack/methods/perceptron/perceptron.hpp       |   2 +-
 src/mlpack/methods/perceptron/perceptron_main.cpp  |   4 +-
 .../preprocess/preprocess_binarize_main.cpp        |   3 +-
 .../preprocess/preprocess_describe_main.cpp        |   3 +-
 .../methods/preprocess/preprocess_imputer_main.cpp |   3 +-
 .../methods/preprocess/preprocess_split_main.cpp   |   3 +-
 src/mlpack/methods/quic_svd/quic_svd.hpp           |   2 +-
 src/mlpack/methods/radical/radical.cpp             |   2 +
 src/mlpack/methods/radical/radical.hpp             |   2 +-
 src/mlpack/methods/radical/radical_main.cpp        |   4 +-
 .../methods/randomized_svd/randomized_svd.hpp      |   2 +-
 src/mlpack/methods/range_search/range_search.hpp   |   2 +-
 .../methods/range_search/range_search_main.cpp     |   2 +-
 .../methods/range_search/range_search_stat.hpp     |   2 +-
 src/mlpack/methods/range_search/rs_model.cpp       |   1 +
 src/mlpack/methods/rann/krann_main.cpp             |   2 +-
 src/mlpack/methods/rann/ra_model_impl.hpp          |   1 +
 src/mlpack/methods/rann/ra_query_stat.hpp          |   2 +-
 src/mlpack/methods/rann/ra_search.hpp              |   2 +-
 src/mlpack/methods/rann/ra_search_impl.hpp         |   2 +-
 src/mlpack/methods/rann/ra_util.hpp                |   2 +-
 .../methods/regularized_svd/regularized_svd.hpp    |   2 +-
 .../regularized_svd/regularized_svd_function.hpp   |   2 +-
 src/mlpack/methods/rmva/rmva.hpp                   | 963 +++++++++++++++++++++
 src/mlpack/methods/rmva/rmva_main.cpp              | 286 ++++++
 .../softmax_regression/softmax_regression.hpp      |   2 +-
 .../softmax_regression_function.hpp                |   2 +-
 .../softmax_regression/softmax_regression_main.cpp |   3 +-
 .../methods/sparse_autoencoder/maximal_inputs.hpp  |   2 +-
 .../sparse_autoencoder/sparse_autoencoder.hpp      |   2 +-
 .../sparse_autoencoder_function.hpp                |   2 +-
 .../data_dependent_random_initializer.hpp          |   3 +-
 .../methods/sparse_coding/nothing_initializer.hpp  |   2 +-
 .../methods/sparse_coding/random_initializer.hpp   |   2 +-
 src/mlpack/methods/sparse_coding/sparse_coding.cpp |   2 +
 src/mlpack/methods/sparse_coding/sparse_coding.hpp |   2 +-
 .../methods/sparse_coding/sparse_coding_main.cpp   |   3 +-
 src/mlpack/prereqs.hpp                             |   5 +
 src/mlpack/tests/lars_test.cpp                     |   1 +
 314 files changed, 11127 insertions(+), 251 deletions(-)

diff --git a/src/mlpack/core/boost_backport/policy.hpp b/src/mlpack/core/boost_backport/policy.hpp
index 83848c5..391c9d1 100644
--- a/src/mlpack/core/boost_backport/policy.hpp
+++ b/src/mlpack/core/boost_backport/policy.hpp
@@ -6,6 +6,7 @@
 #ifndef BOOST_MATH_POLICY_HPP
 #define BOOST_MATH_POLICY_HPP
 
+#include <mlpack/core.hpp>
 #include <boost/mpl/list.hpp>
 #include <boost/mpl/contains.hpp>
 #include <boost/mpl/if.hpp>
@@ -20,6 +21,8 @@
 #include <boost/static_assert.hpp>
 #include <boost/assert.hpp>
 #include <boost/math/tools/config.hpp>
+//#include <boost/math/policies/policy.hpp>
+//#include <boost/math/tools/precision.hpp>
 #include <limits>
 // Sadly we do need the .h versions of these to be sure of getting
 // FLT_MANT_DIG etc.
diff --git a/src/mlpack/core/data/binarize.hpp b/src/mlpack/core/data/binarize.hpp
index fdc3226..92bd80a 100644
--- a/src/mlpack/core/data/binarize.hpp
+++ b/src/mlpack/core/data/binarize.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_DATA_BINARIZE_HPP
 #define MLPACK_CORE_DATA_BINARIZE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/data/dataset_mapper.hpp b/src/mlpack/core/data/dataset_mapper.hpp
index bb243f0..75d60be 100644
--- a/src/mlpack/core/data/dataset_mapper.hpp
+++ b/src/mlpack/core/data/dataset_mapper.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_DATA_DATASET_INFO_HPP
 #define MLPACK_CORE_DATA_DATASET_INFO_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <unordered_map>
 #include <boost/bimap.hpp>
 
diff --git a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
index ea9e18d..db2044d 100644
--- a/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/custom_imputation.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DATA_IMPUTE_STRATEGIES_CUSTOM_IMPUTATION_HPP
 #define MLPACK_CORE_DATA_IMPUTE_STRATEGIES_CUSTOM_IMPUTATION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp b/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
index 03fece4..fe2a4b3 100644
--- a/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
+++ b/src/mlpack/core/data/imputation_methods/listwise_deletion.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DATA_IMPUTE_STRATEGIES_LISTWISE_DELETION_HPP
 #define MLPACK_CORE_DATA_IMPUTE_STRATEGIES_LISTWISE_DELETION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
index 641e6f6..dbc6000 100644
--- a/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/mean_imputation.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DATA_IMPUTE_STRATEGIES_MEAN_IMPUTATION_HPP
 #define MLPACK_CORE_DATA_IMPUTE_STRATEGIES_MEAN_IMPUTATION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/data/imputation_methods/median_imputation.hpp b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
index 08b9e89..d696de3 100644
--- a/src/mlpack/core/data/imputation_methods/median_imputation.hpp
+++ b/src/mlpack/core/data/imputation_methods/median_imputation.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DATA_IMPUTE_STRATEGIES_MEDIAN_IMPUTATION_HPP
 #define MLPACK_CORE_DATA_IMPUTE_STRATEGIES_MEDIAN_IMPUTATION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/data/imputer.hpp b/src/mlpack/core/data/imputer.hpp
index 6991b7b..a5dddfe 100644
--- a/src/mlpack/core/data/imputer.hpp
+++ b/src/mlpack/core/data/imputer.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_DATA_IMPUTER_HPP
 #define MLPACK_CORE_DATA_IMPUTER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "dataset_mapper.hpp"
 #include "map_policies/missing_policy.hpp"
 #include "map_policies/increment_policy.hpp"
diff --git a/src/mlpack/core/data/map_policies/datatype.hpp b/src/mlpack/core/data/map_policies/datatype.hpp
index f6cffc7..73abad4 100644
--- a/src/mlpack/core/data/map_policies/datatype.hpp
+++ b/src/mlpack/core/data/map_policies/datatype.hpp
@@ -11,7 +11,7 @@
 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_DATATYPE_HPP
 #define MLPACK_CORE_DATA_MAP_POLICIES_DATATYPE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/data/map_policies/increment_policy.hpp b/src/mlpack/core/data/map_policies/increment_policy.hpp
index e0cbaf2..3c6c010 100644
--- a/src/mlpack/core/data/map_policies/increment_policy.hpp
+++ b/src/mlpack/core/data/map_policies/increment_policy.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
 #define MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <unordered_map>
 #include <boost/bimap.hpp>
 #include <mlpack/core/data/map_policies/datatype.hpp>
diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index 1900af0..c34e6ba 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
 #define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <unordered_map>
 #include <boost/bimap.hpp>
 #include <mlpack/core/data/map_policies/datatype.hpp>
diff --git a/src/mlpack/core/data/split_data.hpp b/src/mlpack/core/data/split_data.hpp
index fdac9cb..37d37ba 100644
--- a/src/mlpack/core/data/split_data.hpp
+++ b/src/mlpack/core/data/split_data.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_DATA_SPLIT_DATA_HPP
 #define MLPACK_CORE_DATA_SPLIT_DATA_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace data {
diff --git a/src/mlpack/core/dists/discrete_distribution.hpp b/src/mlpack/core/dists/discrete_distribution.hpp
index cfa4bc7..a31ca0c 100644
--- a/src/mlpack/core/dists/discrete_distribution.hpp
+++ b/src/mlpack/core/dists/discrete_distribution.hpp
@@ -13,7 +13,9 @@
 #ifndef MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
 #define MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/log.hpp>
+#include <mlpack/core/math/random.hpp>
 
 namespace mlpack {
 namespace distribution /** Probability distributions. */ {
diff --git a/src/mlpack/core/dists/gamma_distribution.hpp b/src/mlpack/core/dists/gamma_distribution.hpp
index 766f666..95b2de8 100644
--- a/src/mlpack/core/dists/gamma_distribution.hpp
+++ b/src/mlpack/core/dists/gamma_distribution.hpp
@@ -18,7 +18,9 @@
 #ifndef _MLPACK_CORE_DISTRIBUTIONS_GAMMA_DISTRIBUTION_HPP
 #define _MLPACK_CORE_DISTRIBUTIONS_GAMMA_DISTRIBUTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
+#include <boost/program_options.hpp>
 
 namespace mlpack {
 namespace distribution {
diff --git a/src/mlpack/core/dists/gaussian_distribution.hpp b/src/mlpack/core/dists/gaussian_distribution.hpp
index 8768b19..d817f40 100644
--- a/src/mlpack/core/dists/gaussian_distribution.hpp
+++ b/src/mlpack/core/dists/gaussian_distribution.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_DISTRIBUTIONS_GAUSSIAN_DISTRIBUTION_HPP
 #define MLPACK_CORE_DISTRIBUTIONS_GAUSSIAN_DISTRIBUTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace distribution {
diff --git a/src/mlpack/core/dists/laplace_distribution.cpp b/src/mlpack/core/dists/laplace_distribution.cpp
index b01654c..2299ed4 100644
--- a/src/mlpack/core/dists/laplace_distribution.cpp
+++ b/src/mlpack/core/dists/laplace_distribution.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "laplace_distribution.hpp"
 
diff --git a/src/mlpack/core/dists/regression_distribution.hpp b/src/mlpack/core/dists/regression_distribution.hpp
index 4519e17..d38a2a2 100644
--- a/src/mlpack/core/dists/regression_distribution.hpp
+++ b/src/mlpack/core/dists/regression_distribution.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_DISTRIBUTIONS_REGRESSION_DISTRIBUTION_HPP
 #define MLPACK_CORE_DISTRIBUTIONS_REGRESSION_DISTRIBUTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/dists/gaussian_distribution.hpp>
 #include <mlpack/methods/linear_regression/linear_regression.hpp>
 
diff --git a/src/mlpack/core/kernels/cosine_distance.hpp b/src/mlpack/core/kernels/cosine_distance.hpp
index 40a468a..5374d04 100644
--- a/src/mlpack/core/kernels/cosine_distance.hpp
+++ b/src/mlpack/core/kernels/cosine_distance.hpp
@@ -13,7 +13,8 @@
 #ifndef MLPACK_CORE_KERNELS_COSINE_DISTANCE_HPP
 #define MLPACK_CORE_KERNELS_COSINE_DISTANCE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/kernels/kernel_traits.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/epanechnikov_kernel.hpp b/src/mlpack/core/kernels/epanechnikov_kernel.hpp
index 7f4c404..07a5eae 100644
--- a/src/mlpack/core/kernels/epanechnikov_kernel.hpp
+++ b/src/mlpack/core/kernels/epanechnikov_kernel.hpp
@@ -12,7 +12,8 @@
 #ifndef MLPACK_CORE_KERNELS_EPANECHNIKOV_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_EPANECHNIKOV_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/kernels/kernel_traits.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/epanechnikov_kernel_impl.hpp b/src/mlpack/core/kernels/epanechnikov_kernel_impl.hpp
index d4226bb..15308cb 100644
--- a/src/mlpack/core/kernels/epanechnikov_kernel_impl.hpp
+++ b/src/mlpack/core/kernels/epanechnikov_kernel_impl.hpp
@@ -14,6 +14,7 @@
 
 // In case it hasn't already been included.
 #include "epanechnikov_kernel.hpp"
+#include <mlpack/core/util/log.hpp>
 
 #include <mlpack/core/metrics/lmetric.hpp>
 
diff --git a/src/mlpack/core/kernels/example_kernel.hpp b/src/mlpack/core/kernels/example_kernel.hpp
index f715da0..4272535 100644
--- a/src/mlpack/core/kernels/example_kernel.hpp
+++ b/src/mlpack/core/kernels/example_kernel.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_KERNELS_EXAMPLE_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_EXAMPLE_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 
diff --git a/src/mlpack/core/kernels/gaussian_kernel.hpp b/src/mlpack/core/kernels/gaussian_kernel.hpp
index 9b29539..791cea1 100644
--- a/src/mlpack/core/kernels/gaussian_kernel.hpp
+++ b/src/mlpack/core/kernels/gaussian_kernel.hpp
@@ -14,8 +14,9 @@
 #ifndef MLPACK_CORE_KERNELS_GAUSSIAN_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_GAUSSIAN_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
+#include <mlpack/core/kernels/kernel_traits.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/hyperbolic_tangent_kernel.hpp b/src/mlpack/core/kernels/hyperbolic_tangent_kernel.hpp
index fdc1f3b..b4637f8 100644
--- a/src/mlpack/core/kernels/hyperbolic_tangent_kernel.hpp
+++ b/src/mlpack/core/kernels/hyperbolic_tangent_kernel.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_KERNELS_HYPERBOLIC_TANGENT_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_HYPERBOLIC_TANGENT_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/laplacian_kernel.hpp b/src/mlpack/core/kernels/laplacian_kernel.hpp
index 1f85f8b..3574da3 100644
--- a/src/mlpack/core/kernels/laplacian_kernel.hpp
+++ b/src/mlpack/core/kernels/laplacian_kernel.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_KERNELS_LAPLACIAN_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_LAPLACIAN_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/linear_kernel.hpp b/src/mlpack/core/kernels/linear_kernel.hpp
index 7f586bb..c5b28f4 100644
--- a/src/mlpack/core/kernels/linear_kernel.hpp
+++ b/src/mlpack/core/kernels/linear_kernel.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_KERNELS_LINEAR_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_LINEAR_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/polynomial_kernel.hpp b/src/mlpack/core/kernels/polynomial_kernel.hpp
index 5d7902b..ea4b9bc 100644
--- a/src/mlpack/core/kernels/polynomial_kernel.hpp
+++ b/src/mlpack/core/kernels/polynomial_kernel.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_KERNELS_POLYNOMIAL_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_POLYNOMIAL_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/pspectrum_string_kernel.hpp b/src/mlpack/core/kernels/pspectrum_string_kernel.hpp
index 14dc3c9..a5158e9 100644
--- a/src/mlpack/core/kernels/pspectrum_string_kernel.hpp
+++ b/src/mlpack/core/kernels/pspectrum_string_kernel.hpp
@@ -20,7 +20,8 @@
 #include <string>
 #include <vector>
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/log.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/spherical_kernel.hpp b/src/mlpack/core/kernels/spherical_kernel.hpp
index c6baf2e..a5b3df1 100644
--- a/src/mlpack/core/kernels/spherical_kernel.hpp
+++ b/src/mlpack/core/kernels/spherical_kernel.hpp
@@ -11,7 +11,7 @@
 #define MLPACK_CORE_KERNELS_SPHERICAL_KERNEL_HPP
 
 #include <boost/math/special_functions/gamma.hpp>
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/core/kernels/triangular_kernel.hpp b/src/mlpack/core/kernels/triangular_kernel.hpp
index 022ae4f..344d8db 100644
--- a/src/mlpack/core/kernels/triangular_kernel.hpp
+++ b/src/mlpack/core/kernels/triangular_kernel.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_KERNELS_TRIANGULAR_KERNEL_HPP
 #define MLPACK_CORE_KERNELS_TRIANGULAR_KERNEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/core/math/columns_to_blocks.hpp b/src/mlpack/core/math/columns_to_blocks.hpp
index 5ba6d95..e9f12de 100644
--- a/src/mlpack/core/math/columns_to_blocks.hpp
+++ b/src/mlpack/core/math/columns_to_blocks.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NN_COLUMNS_TO_BLOCKS_HPP
 #define MLPACK_METHODS_NN_COLUMNS_TO_BLOCKS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace math {
diff --git a/src/mlpack/core/math/lin_alg.cpp b/src/mlpack/core/math/lin_alg.cpp
index 0d8d6de..af7fa91 100644
--- a/src/mlpack/core/math/lin_alg.cpp
+++ b/src/mlpack/core/math/lin_alg.cpp
@@ -10,7 +10,8 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "lin_alg.hpp"
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
 
 using namespace mlpack;
 using namespace math;
diff --git a/src/mlpack/core/metrics/lmetric.hpp b/src/mlpack/core/metrics/lmetric.hpp
index a84b2c6..71ef07a 100644
--- a/src/mlpack/core/metrics/lmetric.hpp
+++ b/src/mlpack/core/metrics/lmetric.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_METRICS_LMETRIC_HPP
 #define MLPACK_CORE_METRICS_LMETRIC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace metric {
diff --git a/src/mlpack/core/metrics/mahalanobis_distance.hpp b/src/mlpack/core/metrics/mahalanobis_distance.hpp
index ef1c48b..4ffe453 100644
--- a/src/mlpack/core/metrics/mahalanobis_distance.hpp
+++ b/src/mlpack/core/metrics/mahalanobis_distance.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_METRICS_MAHALANOBIS_DISTANCE_HPP
 #define MLPACK_CORE_METRICS_MAHALANOBIS_DISTANCE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace metric {
diff --git a/src/mlpack/core/optimizers/adadelta/ada_delta.hpp b/src/mlpack/core/optimizers/adadelta/ada_delta.hpp
index ba0a1be..dec379f 100644
--- a/src/mlpack/core/optimizers/adadelta/ada_delta.hpp
+++ b/src/mlpack/core/optimizers/adadelta/ada_delta.hpp
@@ -15,7 +15,7 @@
 #ifndef __MLPACK_CORE_OPTIMIZERS_ADADELTA_ADA_DELTA_HPP
 #define __MLPACK_CORE_OPTIMIZERS_ADADELTA_ADA_DELTA_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/adam/adam.hpp b/src/mlpack/core/optimizers/adam/adam.hpp
index dc430bc..352606c 100644
--- a/src/mlpack/core/optimizers/adam/adam.hpp
+++ b/src/mlpack/core/optimizers/adam/adam.hpp
@@ -16,7 +16,7 @@
 #ifndef __MLPACK_CORE_OPTIMIZERS_ADAM_ADAM_HPP
 #define __MLPACK_CORE_OPTIMIZERS_ADAM_ADAM_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp b/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp
index 10f8d4e..96686c5 100644
--- a/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp
+++ b/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_AUG_LAGRANGIAN_AUG_LAGRANGIAN_HPP
 #define MLPACK_CORE_OPTIMIZERS_AUG_LAGRANGIAN_AUG_LAGRANGIAN_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
 
 #include "aug_lagrangian_function.hpp"
diff --git a/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_function.hpp b/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_function.hpp
index 7e6aaff..7f01d48 100644
--- a/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_function.hpp
+++ b/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_function.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_AUG_LAGRANGIAN_AUG_LAGRANGIAN_FUNCTION_HPP
 #define MLPACK_CORE_OPTIMIZERS_AUG_LAGRANGIAN_AUG_LAGRANGIAN_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_test_functions.hpp b/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_test_functions.hpp
index 1febb43..430ebb3 100644
--- a/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_test_functions.hpp
+++ b/src/mlpack/core/optimizers/aug_lagrangian/aug_lagrangian_test_functions.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_AUG_LAGRANGIAN_TEST_FUNCTIONS_HPP
 #define MLPACK_CORE_OPTIMIZERS_AUG_LAGRANGIAN_TEST_FUNCTIONS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
index 737a859..f7b0005 100644
--- a/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
+++ b/src/mlpack/core/optimizers/gradient_descent/gradient_descent.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_GRADIENT_DESCENT_GRADIENT_DESCENT_HPP
 #define MLPACK_CORE_OPTIMIZERS_GRADIENT_DESCENT_GRADIENT_DESCENT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/gradient_descent/test_function.hpp b/src/mlpack/core/optimizers/gradient_descent/test_function.hpp
index 56b9192..63d4a62 100644
--- a/src/mlpack/core/optimizers/gradient_descent/test_function.hpp
+++ b/src/mlpack/core/optimizers/gradient_descent/test_function.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_GD_TEST_FUNCTION_HPP
 #define MLPACK_CORE_OPTIMIZERS_GD_TEST_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
index 2edc108..422e6c7 100644
--- a/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
+++ b/src/mlpack/core/optimizers/lbfgs/lbfgs.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_LBFGS_LBFGS_HPP
 #define MLPACK_CORE_OPTIMIZERS_LBFGS_LBFGS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/lbfgs/test_functions.hpp b/src/mlpack/core/optimizers/lbfgs/test_functions.hpp
index 4ed54d3..b44844e 100644
--- a/src/mlpack/core/optimizers/lbfgs/test_functions.hpp
+++ b/src/mlpack/core/optimizers/lbfgs/test_functions.hpp
@@ -18,7 +18,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_LBFGS_TEST_FUNCTIONS_HPP
 #define MLPACK_CORE_OPTIMIZERS_LBFGS_TEST_FUNCTIONS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 // To fulfill the template policy class 'FunctionType', we must implement
 // the following:
diff --git a/src/mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp b/src/mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp
index e9cb089..e562b4c 100644
--- a/src/mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp
+++ b/src/mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_MINIBATCH_SGD_MINIBATCH_SGD_HPP
 #define MLPACK_CORE_OPTIMIZERS_MINIBATCH_SGD_MINIBATCH_SGD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
index 5e04402..58969e5 100644
--- a/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
+++ b/src/mlpack/core/optimizers/rmsprop/rmsprop.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_HPP
 #define MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/sdp/lrsdp.hpp b/src/mlpack/core/optimizers/sdp/lrsdp.hpp
index d7e0df1..8bfa943 100644
--- a/src/mlpack/core/optimizers/sdp/lrsdp.hpp
+++ b/src/mlpack/core/optimizers/sdp/lrsdp.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_SDP_LRSDP_HPP
 #define MLPACK_CORE_OPTIMIZERS_SDP_LRSDP_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp>
 
 #include "lrsdp_function.hpp"
diff --git a/src/mlpack/core/optimizers/sdp/lrsdp_function.hpp b/src/mlpack/core/optimizers/sdp/lrsdp_function.hpp
index aa195b2..6493dfb 100644
--- a/src/mlpack/core/optimizers/sdp/lrsdp_function.hpp
+++ b/src/mlpack/core/optimizers/sdp/lrsdp_function.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_SDP_LRSDP_FUNCTION_HPP
 #define MLPACK_CORE_OPTIMIZERS_SDP_LRSDP_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/aug_lagrangian/aug_lagrangian.hpp>
 #include <mlpack/core/optimizers/sdp/sdp.hpp>
 
diff --git a/src/mlpack/core/optimizers/sdp/primal_dual.hpp b/src/mlpack/core/optimizers/sdp/primal_dual.hpp
index 62ab0cd..9915115 100644
--- a/src/mlpack/core/optimizers/sdp/primal_dual.hpp
+++ b/src/mlpack/core/optimizers/sdp/primal_dual.hpp
@@ -11,7 +11,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_SDP_PRIMAL_DUAL_HPP
 #define MLPACK_CORE_OPTIMIZERS_SDP_PRIMAL_DUAL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/sdp/sdp.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/core/optimizers/sdp/sdp.hpp b/src/mlpack/core/optimizers/sdp/sdp.hpp
index 2409057..5a0c89e 100644
--- a/src/mlpack/core/optimizers/sdp/sdp.hpp
+++ b/src/mlpack/core/optimizers/sdp/sdp.hpp
@@ -11,7 +11,8 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_SDP_SDP_HPP
 #define MLPACK_CORE_OPTIMIZERS_SDP_SDP_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/lin_alg.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/sgd/sgd.hpp b/src/mlpack/core/optimizers/sgd/sgd.hpp
index 9d3518d..fbf6021 100644
--- a/src/mlpack/core/optimizers/sgd/sgd.hpp
+++ b/src/mlpack/core/optimizers/sgd/sgd.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_SGD_SGD_HPP
 #define MLPACK_CORE_OPTIMIZERS_SGD_SGD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/optimizers/sgd/test_function.hpp b/src/mlpack/core/optimizers/sgd/test_function.hpp
index bed380c..0ed0743 100644
--- a/src/mlpack/core/optimizers/sgd/test_function.hpp
+++ b/src/mlpack/core/optimizers/sgd/test_function.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_OPTIMIZERS_SGD_TEST_FUNCTION_HPP
 #define MLPACK_CORE_OPTIMIZERS_SGD_TEST_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace optimization {
diff --git a/src/mlpack/core/tree/ballbound.hpp b/src/mlpack/core/tree/ballbound.hpp
index a5e002c..aff0699 100644
--- a/src/mlpack/core/tree/ballbound.hpp
+++ b/src/mlpack/core/tree/ballbound.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_BALLBOUND_HPP
 #define MLPACK_CORE_TREE_BALLBOUND_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include "bound_traits.hpp"
 
diff --git a/src/mlpack/core/tree/ballbound_impl.hpp b/src/mlpack/core/tree/ballbound_impl.hpp
index 94a5369..a6f4ba1 100644
--- a/src/mlpack/core/tree/ballbound_impl.hpp
+++ b/src/mlpack/core/tree/ballbound_impl.hpp
@@ -14,6 +14,7 @@
 
 // In case it hasn't been included already.
 #include "ballbound.hpp"
+#include <mlpack/core/math/clamp.hpp>
 
 #include <string>
 
diff --git a/src/mlpack/core/tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree.hpp
index 1b6f1d2..6ffcf62 100644
--- a/src/mlpack/core/tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "bounds.hpp"
 #include "binary_space_tree/midpoint_split.hpp"
 #include "binary_space_tree/mean_split.hpp"
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 7d16fd8..f9e19b5 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -11,7 +11,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_BINARY_SPACE_TREE_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_BINARY_SPACE_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "../statistic.hpp"
 #include "midpoint_split.hpp"
diff --git a/src/mlpack/core/tree/binary_space_tree/breadth_first_dual_tree_traverser.hpp b/src/mlpack/core/tree/binary_space_tree/breadth_first_dual_tree_traverser.hpp
index 8985da8..90c361b 100644
--- a/src/mlpack/core/tree/binary_space_tree/breadth_first_dual_tree_traverser.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/breadth_first_dual_tree_traverser.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_BREADTH_FIRST_DUAL_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_BREADTH_FIRST_DUAL_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <queue>
 
 #include "../binary_space_tree.hpp"
diff --git a/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp b/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp
index cc1a429..8129efe 100644
--- a/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/dual_tree_traverser.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_DUAL_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "binary_space_tree.hpp"
 
diff --git a/src/mlpack/core/tree/binary_space_tree/mean_split.hpp b/src/mlpack/core/tree/binary_space_tree/mean_split.hpp
index 02cc5d0..b194461 100644
--- a/src/mlpack/core/tree/binary_space_tree/mean_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/mean_split.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_MEAN_SPLIT_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_MEAN_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/perform_split.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/binary_space_tree/midpoint_split.hpp b/src/mlpack/core/tree/binary_space_tree/midpoint_split.hpp
index 779a301..cbbe861 100644
--- a/src/mlpack/core/tree/binary_space_tree/midpoint_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/midpoint_split.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_MIDPOINT_SPLIT_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_MIDPOINT_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/perform_split.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
index b7400ee..cc120eb 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_max_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MAX_SPLIT_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MAX_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/perform_split.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
index eddb73d..3d8d8a6 100644
--- a/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/rp_tree_mean_split.hpp
@@ -13,9 +13,10 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MEAN_SPLIT_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_RP_TREE_MEAN_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "rp_tree_max_split.hpp"
 #include <mlpack/core/tree/perform_split.hpp>
+#include <mlpack/core/math/lin_alg.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/binary_space_tree/single_tree_traverser.hpp b/src/mlpack/core/tree/binary_space_tree/single_tree_traverser.hpp
index 9665583..49234b1 100644
--- a/src/mlpack/core/tree/binary_space_tree/single_tree_traverser.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/single_tree_traverser.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_SINGLE_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_SINGLE_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "binary_space_tree.hpp"
 
diff --git a/src/mlpack/core/tree/binary_space_tree/ub_tree_split.hpp b/src/mlpack/core/tree/binary_space_tree/ub_tree_split.hpp
index b914d8a..150cbd2 100644
--- a/src/mlpack/core/tree/binary_space_tree/ub_tree_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/ub_tree_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_UB_TREE_SPLIT_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_UB_TREE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "../address.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/binary_space_tree/vantage_point_split.hpp b/src/mlpack/core/tree/binary_space_tree/vantage_point_split.hpp
index ed24ea0..89271a3 100644
--- a/src/mlpack/core/tree/binary_space_tree/vantage_point_split.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/vantage_point_split.hpp
@@ -13,8 +13,9 @@
 #ifndef MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_HPP
 #define MLPACK_CORE_TREE_BINARY_SPACE_TREE_VANTAGE_POINT_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/perform_split.hpp>
+#include <mlpack/core/math/random.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/cellbound.hpp b/src/mlpack/core/tree/cellbound.hpp
index 2ef5d41..f7f7fd6 100644
--- a/src/mlpack/core/tree/cellbound.hpp
+++ b/src/mlpack/core/tree/cellbound.hpp
@@ -34,7 +34,7 @@
 #ifndef MLPACK_CORE_TREE_CELLBOUND_HPP
 #define MLPACK_CORE_TREE_CELLBOUND_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/math/range.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include "bound_traits.hpp"
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
index 35a4dc1..929ca82 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
@@ -10,6 +10,7 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "cosine_tree.hpp"
+#include <mlpack/core/util/log.hpp>
 
 #include <boost/math/distributions/normal.hpp>
 
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
index 042798a..8e1155d 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
 #define MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <boost/heap/priority_queue.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/cover_tree.hpp b/src/mlpack/core/tree/cover_tree.hpp
index b84ad6c..dcb8862 100644
--- a/src/mlpack/core/tree/cover_tree.hpp
+++ b/src/mlpack/core/tree/cover_tree.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_COVER_TREE_HPP
 #define MLPACK_CORE_TREE_COVER_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "cover_tree/first_point_is_root.hpp"
 #include "cover_tree/cover_tree.hpp"
 #include "cover_tree/single_tree_traverser.hpp"
diff --git a/src/mlpack/core/tree/cover_tree/cover_tree.hpp b/src/mlpack/core/tree/cover_tree/cover_tree.hpp
index af45181..0f8e804 100644
--- a/src/mlpack/core/tree/cover_tree/cover_tree.hpp
+++ b/src/mlpack/core/tree/cover_tree/cover_tree.hpp
@@ -12,7 +12,8 @@
 #ifndef MLPACK_CORE_TREE_COVER_TREE_COVER_TREE_HPP
 #define MLPACK_CORE_TREE_COVER_TREE_COVER_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/range.hpp>
 
 #include "../statistic.hpp"
 #include "first_point_is_root.hpp"
diff --git a/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp b/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp
index 2440be4..98a14f5 100644
--- a/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp
+++ b/src/mlpack/core/tree/cover_tree/dual_tree_traverser.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <queue>
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp b/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp
index 4aa0dad..64dadf3 100644
--- a/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp
+++ b/src/mlpack/core/tree/cover_tree/dual_tree_traverser_impl.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
 #define MLPACK_CORE_TREE_COVER_TREE_DUAL_TREE_TRAVERSER_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <queue>
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp b/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp
index 42cd037..3e37707 100644
--- a/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp
+++ b/src/mlpack/core/tree/cover_tree/first_point_is_root.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP
 #define MLPACK_CORE_TREE_FIRST_POINT_IS_ROOT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp b/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp
index efc01d6..2411f55 100644
--- a/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp
+++ b/src/mlpack/core/tree/cover_tree/single_tree_traverser.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_COVER_TREE_SINGLE_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_COVER_TREE_SINGLE_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "cover_tree.hpp"
 
diff --git a/src/mlpack/core/tree/greedy_single_tree_traverser.hpp b/src/mlpack/core/tree/greedy_single_tree_traverser.hpp
index c2ba5c5..ea4a745 100644
--- a/src/mlpack/core/tree/greedy_single_tree_traverser.hpp
+++ b/src/mlpack/core/tree/greedy_single_tree_traverser.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_GREEDY_SINGLE_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_GREEDY_SINGLE_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/hollow_ball_bound.hpp b/src/mlpack/core/tree/hollow_ball_bound.hpp
index 15bf154..10299c6 100644
--- a/src/mlpack/core/tree/hollow_ball_bound.hpp
+++ b/src/mlpack/core/tree/hollow_ball_bound.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_HOLLOW_BALL_BOUND_HPP
 #define MLPACK_CORE_TREE_HOLLOW_BALL_BOUND_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include "bound_traits.hpp"
 
diff --git a/src/mlpack/core/tree/hrectbound.hpp b/src/mlpack/core/tree/hrectbound.hpp
index 04b48d8..b541791 100644
--- a/src/mlpack/core/tree/hrectbound.hpp
+++ b/src/mlpack/core/tree/hrectbound.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_HRECTBOUND_HPP
 #define MLPACK_CORE_TREE_HRECTBOUND_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/math/range.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include "bound_traits.hpp"
diff --git a/src/mlpack/core/tree/octree.hpp b/src/mlpack/core/tree/octree.hpp
index f274657..4b7514d 100644
--- a/src/mlpack/core/tree/octree.hpp
+++ b/src/mlpack/core/tree/octree.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_OCTREE_HPP
 #define MLPACK_CORE_TREE_OCTREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "bounds.hpp"
 #include "octree/octree.hpp"
 #include "octree/traits.hpp"
diff --git a/src/mlpack/core/tree/octree/dual_tree_traverser.hpp b/src/mlpack/core/tree/octree/dual_tree_traverser.hpp
index 28a185e..a69fbb4 100644
--- a/src/mlpack/core/tree/octree/dual_tree_traverser.hpp
+++ b/src/mlpack/core/tree/octree/dual_tree_traverser.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_OCTREE_DUAL_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_OCTREE_DUAL_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "octree.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/octree/octree.hpp b/src/mlpack/core/tree/octree/octree.hpp
index c5c8603..46f3714 100644
--- a/src/mlpack/core/tree/octree/octree.hpp
+++ b/src/mlpack/core/tree/octree/octree.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_OCTREE_OCTREE_HPP
 #define MLPACK_CORE_TREE_OCTREE_OCTREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "../hrectbound.hpp"
 #include "../statistic.hpp"
 
diff --git a/src/mlpack/core/tree/octree/single_tree_traverser.hpp b/src/mlpack/core/tree/octree/single_tree_traverser.hpp
index c1317fa..6eeb2f4 100644
--- a/src/mlpack/core/tree/octree/single_tree_traverser.hpp
+++ b/src/mlpack/core/tree/octree/single_tree_traverser.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_OCTREE_SINGLE_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_OCTREE_SINGLE_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "octree.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/rectangle_tree/discrete_hilbert_value.hpp b/src/mlpack/core/tree/rectangle_tree/discrete_hilbert_value.hpp
index beb9158..40acca7 100644
--- a/src/mlpack/core/tree/rectangle_tree/discrete_hilbert_value.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/discrete_hilbert_value.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/rectangle_tree/dual_tree_traverser.hpp b/src/mlpack/core/tree/rectangle_tree/dual_tree_traverser.hpp
index a329a8b..e2a390d 100644
--- a/src/mlpack/core/tree/rectangle_tree/dual_tree_traverser.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/dual_tree_traverser.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_DUAL_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_DUAL_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "rectangle_tree.hpp"
 
diff --git a/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_descent_heuristic.hpp b/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_descent_heuristic.hpp
index 4d749e4..1a56470 100644
--- a/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_descent_heuristic.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_descent_heuristic.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_DESCENT_HEURISTIC_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_DESCENT_HEURISTIC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_split.hpp
index 85eafdd..dca557b 100644
--- a/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/hilbert_r_tree_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_SPLIT_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_HILBERT_R_TREE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_auxiliary_information.hpp b/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_auxiliary_information.hpp
index 2611259..6893352 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_auxiliary_information.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_auxiliary_information.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_PLUS_TREE_AUXILIARY_INFORMATION_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_PLUS_TREE_AUXILIARY_INFORMATION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "../hrectbound.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_descent_heuristic.hpp b/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_descent_heuristic.hpp
index c656852..c813470 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_descent_heuristic.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_plus_plus_tree_descent_heuristic.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_PLUS_TREE_DESCENT_HEURISTIC_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_PLUS_TREE_DESCENT_HEURISTIC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_plus_tree_descent_heuristic.hpp b/src/mlpack/core/tree/rectangle_tree/r_plus_tree_descent_heuristic.hpp
index 44cafdd..717b969 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_plus_tree_descent_heuristic.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_plus_tree_descent_heuristic.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_DESCENT_HEURISTIC_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_DESCENT_HEURISTIC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split.hpp
index 701f522..0b0a5d8 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_descent_heuristic.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_descent_heuristic.hpp
index 0dcb844..cfc8b05 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_descent_heuristic.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_descent_heuristic.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_STAR_TREE_DESCENT_HEURISTIC_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_STAR_TREE_DESCENT_HEURISTIC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
index 16868bd..6370120 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_STAR_TREE_SPLIT_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_STAR_TREE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_descent_heuristic.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_descent_heuristic.hpp
index 8fd635a..e79a885 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_descent_heuristic.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_descent_heuristic.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_DESCENT_HEURISTIC_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_DESCENT_HEURISTIC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
index 67aa28d..0375876 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_SPLIT_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_R_TREE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
index b73b8e1..166b4d7 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "../hrectbound.hpp"
 #include "../statistic.hpp"
diff --git a/src/mlpack/core/tree/rectangle_tree/single_tree_traverser.hpp b/src/mlpack/core/tree/rectangle_tree/single_tree_traverser.hpp
index 229d992..d8f465c 100644
--- a/src/mlpack/core/tree/rectangle_tree/single_tree_traverser.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/single_tree_traverser.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_SINGLE_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_SINGLE_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "rectangle_tree.hpp"
 
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
index 080376f..a3d6cef 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_X_TREE_SPLIT_HPP
 #define MLPACK_CORE_TREE_RECTANGLE_TREE_X_TREE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree /** Trees and tree-building procedures. */ {
diff --git a/src/mlpack/core/tree/space_split/hyperplane.hpp b/src/mlpack/core/tree/space_split/hyperplane.hpp
index befa0fd..741ddd5 100644
--- a/src/mlpack/core/tree/space_split/hyperplane.hpp
+++ b/src/mlpack/core/tree/space_split/hyperplane.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_HYPERPLANE_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_HYPERPLANE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "projection_vector.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/space_split/mean_space_split.hpp b/src/mlpack/core/tree/space_split/mean_space_split.hpp
index 3d220d2..622dae7 100644
--- a/src/mlpack/core/tree/space_split/mean_space_split.hpp
+++ b/src/mlpack/core/tree/space_split/mean_space_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_MEAN_SPACE_SPLIT_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_MEAN_SPACE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "hyperplane.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/space_split/midpoint_space_split.hpp b/src/mlpack/core/tree/space_split/midpoint_space_split.hpp
index a0eb0a6..58266bb 100644
--- a/src/mlpack/core/tree/space_split/midpoint_space_split.hpp
+++ b/src/mlpack/core/tree/space_split/midpoint_space_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_MIDPOINT_SPACE_SPLIT_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_MIDPOINT_SPACE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "hyperplane.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/space_split/projection_vector.hpp b/src/mlpack/core/tree/space_split/projection_vector.hpp
index 824b9df..3118b28 100644
--- a/src/mlpack/core/tree/space_split/projection_vector.hpp
+++ b/src/mlpack/core/tree/space_split/projection_vector.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_PROJECTION_VECTOR_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_PROJECTION_VECTOR_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "../bounds.hpp"
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/core/tree/space_split/space_split.hpp b/src/mlpack/core/tree/space_split/space_split.hpp
index 2f1f47d..4982d67 100644
--- a/src/mlpack/core/tree/space_split/space_split.hpp
+++ b/src/mlpack/core/tree/space_split/space_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPACE_SPLIT_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_SPACE_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "hyperplane.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/core/tree/spill_tree.hpp b/src/mlpack/core/tree/spill_tree.hpp
index 592fb97..0f21a70 100644
--- a/src/mlpack/core/tree/spill_tree.hpp
+++ b/src/mlpack/core/tree/spill_tree.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "bounds.hpp"
 #include "spill_tree/is_spill_tree.hpp"
 #include "spill_tree/spill_tree.hpp"
diff --git a/src/mlpack/core/tree/spill_tree/spill_dual_tree_traverser.hpp b/src/mlpack/core/tree/spill_tree/spill_dual_tree_traverser.hpp
index 87246fa..210dea7 100644
--- a/src/mlpack/core/tree/spill_tree/spill_dual_tree_traverser.hpp
+++ b/src/mlpack/core/tree/spill_tree/spill_dual_tree_traverser.hpp
@@ -18,7 +18,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPILL_DUAL_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_SPILL_DUAL_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "spill_tree.hpp"
 
diff --git a/src/mlpack/core/tree/spill_tree/spill_single_tree_traverser.hpp b/src/mlpack/core/tree/spill_tree/spill_single_tree_traverser.hpp
index 09704a2..9979436 100644
--- a/src/mlpack/core/tree/spill_tree/spill_single_tree_traverser.hpp
+++ b/src/mlpack/core/tree/spill_tree/spill_single_tree_traverser.hpp
@@ -17,7 +17,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPILL_SINGLE_TREE_TRAVERSER_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_SPILL_SINGLE_TREE_TRAVERSER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "spill_tree.hpp"
 
diff --git a/src/mlpack/core/tree/spill_tree/spill_tree.hpp b/src/mlpack/core/tree/spill_tree/spill_tree.hpp
index 084c808..45a62a4 100644
--- a/src/mlpack/core/tree/spill_tree/spill_tree.hpp
+++ b/src/mlpack/core/tree/spill_tree/spill_tree.hpp
@@ -11,7 +11,7 @@
 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
 #define MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "../space_split/midpoint_space_split.hpp"
 #include "../statistic.hpp"
 
diff --git a/src/mlpack/core/util/param.hpp b/src/mlpack/core/util/param.hpp
index f13a52a..c44e464 100644
--- a/src/mlpack/core/util/param.hpp
+++ b/src/mlpack/core/util/param.hpp
@@ -15,6 +15,8 @@
 #ifndef MLPACK_CORE_UTIL_PARAM_HPP
 #define MLPACK_CORE_UTIL_PARAM_HPP
 
+#include <mlpack/core/util/cli.hpp>
+
 /**
  * Document an executable.  Only one instance of this macro should be
  * present in your program!  Therefore, use it in the main.cpp
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index ec74bb5..d273282 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -28,7 +28,7 @@
 #ifndef MLPACK_METHODS_ADABOOST_ADABOOST_HPP
 #define MLPACK_METHODS_ADABOOST_ADABOOST_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/perceptron/perceptron.hpp>
 #include <mlpack/methods/decision_stump/decision_stump.hpp>
 
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
index baa23b4..d7ee450 100644
--- a/src/mlpack/methods/adaboost/adaboost_main.cpp
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -32,7 +32,9 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/data/normalize_labels.hpp>
 #include "adaboost.hpp"
 
 using namespace mlpack;
diff --git a/src/mlpack/methods/amf/amf.hpp b/src/mlpack/methods/amf/amf.hpp
index 9cecef9..3976e22 100644
--- a/src/mlpack/methods/amf/amf.hpp
+++ b/src/mlpack/methods/amf/amf.hpp
@@ -18,7 +18,7 @@
 #ifndef MLPACK_METHODS_AMF_AMF_HPP
 #define MLPACK_METHODS_AMF_AMF_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/methods/amf/update_rules/nmf_mult_dist.hpp>
 #include <mlpack/methods/amf/update_rules/nmf_als.hpp>
diff --git a/src/mlpack/methods/amf/init_rules/average_init.hpp b/src/mlpack/methods/amf/init_rules/average_init.hpp
index 21404f1..5b33853 100644
--- a/src/mlpack/methods/amf/init_rules/average_init.hpp
+++ b/src/mlpack/methods/amf/init_rules/average_init.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_AMF_AVERAGE_INIT_HPP
 #define MLPACK_METHODS_AMF_AVERAGE_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/init_rules/given_init.hpp b/src/mlpack/methods/amf/init_rules/given_init.hpp
index c9ee9cc..45ce118 100644
--- a/src/mlpack/methods/amf/init_rules/given_init.hpp
+++ b/src/mlpack/methods/amf/init_rules/given_init.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_AMF_INIT_RULES_GIVEN_INIT_HPP
 #define MLPACK_METHODS_AMF_INIT_RULES_GIVEN_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/init_rules/random_acol_init.hpp b/src/mlpack/methods/amf/init_rules/random_acol_init.hpp
index dad9bc8..224aa73 100644
--- a/src/mlpack/methods/amf/init_rules/random_acol_init.hpp
+++ b/src/mlpack/methods/amf/init_rules/random_acol_init.hpp
@@ -12,7 +12,8 @@
 #ifndef MLPACK_METHODS_LMF_RANDOM_ACOL_INIT_HPP
 #define MLPACK_METHODS_LMF_RANDOM_ACOL_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/init_rules/random_init.hpp b/src/mlpack/methods/amf/init_rules/random_init.hpp
index 77b19cd..155e7b3 100644
--- a/src/mlpack/methods/amf/init_rules/random_init.hpp
+++ b/src/mlpack/methods/amf/init_rules/random_init.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_LMF_RANDOM_INIT_HPP
 #define MLPACK_METHODS_LMF_RANDOM_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp b/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
index 4b51255..35045c4 100644
--- a/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
@@ -12,7 +12,7 @@
 #ifndef _MLPACK_METHODS_AMF_INCOMPLETE_INCREMENTAL_TERMINATION_HPP
 #define _MLPACK_METHODS_AMF_INCOMPLETE_INCREMENTAL_TERMINATION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
index 551cf94..bc659f2 100644
--- a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
@@ -12,7 +12,7 @@
 #ifndef _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
 #define _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
index 62445df..1e29c27 100644
--- a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
@@ -12,7 +12,7 @@
 #ifndef _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
 #define _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp b/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
index 961a332..00053c3 100644
--- a/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
@@ -12,7 +12,7 @@
 #ifndef _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
 #define _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack
 {
diff --git a/src/mlpack/methods/amf/update_rules/nmf_als.hpp b/src/mlpack/methods/amf/update_rules/nmf_als.hpp
index 1ca4e20..fa21a30 100644
--- a/src/mlpack/methods/amf/update_rules/nmf_als.hpp
+++ b/src/mlpack/methods/amf/update_rules/nmf_als.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_LMF_UPDATE_RULES_NMF_ALS_HPP
 #define MLPACK_METHODS_LMF_UPDATE_RULES_NMF_ALS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/update_rules/nmf_mult_dist.hpp b/src/mlpack/methods/amf/update_rules/nmf_mult_dist.hpp
index ea08b0c..30a5850 100644
--- a/src/mlpack/methods/amf/update_rules/nmf_mult_dist.hpp
+++ b/src/mlpack/methods/amf/update_rules/nmf_mult_dist.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIST_UPDATE_RULES_HPP
 #define MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIST_UPDATE_RULES_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/update_rules/nmf_mult_div.hpp b/src/mlpack/methods/amf/update_rules/nmf_mult_div.hpp
index 6c2f596..567b45f 100644
--- a/src/mlpack/methods/amf/update_rules/nmf_mult_div.hpp
+++ b/src/mlpack/methods/amf/update_rules/nmf_mult_div.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIV_HPP
 #define MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIV_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp b/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
index 862c7ee..ba8c22b 100644
--- a/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
+++ b/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_AMF_UPDATE_RULES_SVD_BATCH_LEARNING_HPP
 #define MLPACK_METHODS_AMF_UPDATE_RULES_SVD_BATCH_LEARNING_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace amf {
diff --git a/src/mlpack/methods/amf/update_rules/svd_complete_incremental_learning.hpp b/src/mlpack/methods/amf/update_rules/svd_complete_incremental_learning.hpp
index 2038fa3..7e8a0d3 100644
--- a/src/mlpack/methods/amf/update_rules/svd_complete_incremental_learning.hpp
+++ b/src/mlpack/methods/amf/update_rules/svd_complete_incremental_learning.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_AMF_SVD_COMPLETE_INCREMENTAL_LEARNING_HPP
 #define MLPACK_METHODS_AMF_SVD_COMPLETE_INCREMENTAL_LEARNING_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack
 {
diff --git a/src/mlpack/methods/ann/activation_functions/identity_function.hpp b/src/mlpack/methods/ann/activation_functions/identity_function.hpp
new file mode 100644
index 0000000..b1a1990
--- /dev/null
+++ b/src/mlpack/methods/ann/activation_functions/identity_function.hpp
@@ -0,0 +1,96 @@
+/**
+ * @file identity_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the identity function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_IDENTITY_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_IDENTITY_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The identity function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& x \\
+ * f'(x) &=& 1
+ * @f}
+ */
+class IdentityFunction
+{
+ public:
+  /**
+   * Computes the identity function.
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  static double fn(const double x)
+  {
+    return x;
+  }
+
+  /**
+   * Computes the identity function.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void fn(const InputVecType& x, OutputVecType& y)
+  {
+    y = x;
+  }
+
+  /**
+   * Computes the first derivative of the identity function.
+   *
+   * @param x Input data.
+   * @return f'(x)
+   */
+  static double deriv(const double /* unused */)
+  {
+    return 1.0;
+  }
+
+  /**
+   * Computes the first derivatives of the identity function.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void deriv(const InputVecType& y, OutputVecType& x)
+  {
+    x.ones(y.n_elem);
+  }
+
+  /**
+   * Computes the first derivatives of the identity function using a 3rd order
+   * tensor as input.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+  template<typename eT>
+  static void deriv(const arma::Cube<eT>& y, arma::Cube<eT>& x)
+  {
+    x.ones(y.n_rows, y.n_cols, y.n_slices);
+  }
+
+
+}; // class IdentityFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/activation_functions/logistic_function.hpp b/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
new file mode 100644
index 0000000..f818cd4
--- /dev/null
+++ b/src/mlpack/methods/ann/activation_functions/logistic_function.hpp
@@ -0,0 +1,114 @@
+/**
+ * @file logistic_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the logistic function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_LOGISTIC_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_LOGISTIC_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The logistic function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \frac{1}{1 + e^{-x}} \\
+ * f'(x) &=& f(x) * (1 - f(x)) \\
+ * f^{-1}(y) &=& ln(\frac{y}{1-y})
+ * @f}
+ */
+class LogisticFunction
+{
+  public:
+  /**
+   * Computes the logistic function.
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  template<typename eT>
+  static double fn(const eT x)
+  {
+    if (x < arma::Datum<eT>::log_max)
+    {
+      if (x > -arma::Datum<eT>::log_max)
+        return 1.0 /  (1.0 + std::exp(-x));
+
+      return 0.0;
+    }
+
+    return 1.0;
+  }
+
+  /**
+   * Computes the logistic function.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void fn(const InputVecType& x, OutputVecType& y)
+  {
+    y = (1.0 / (1 + arma::exp(-x)));
+  }
+
+  /**
+   * Computes the first derivative of the logistic function.
+   *
+   * @param x Input data.
+   * @return f'(x)
+   */
+  static double deriv(const double y)
+  {
+    return y * (1.0 - y);
+  }
+
+  /**
+   * Computes the first derivatives of the logistic function.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void deriv(const InputVecType& y, OutputVecType& x)
+  {
+    x = y % (1.0 - y);
+  }
+
+  /**
+   * Computes the inverse of the logistic function.
+   *
+   * @param y Input data.
+   * @return f^{-1}(y)
+   */
+  static double inv(const double y)
+  {
+    return arma::trunc_log(y / (1 - y));
+  }
+
+  /**
+   * Computes the inverse of the logistic function.
+   *
+   * @param y Input data.
+   * @return  x The resulting inverse of the input data.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void inv(const InputVecType& y, OutputVecType& x)
+  {
+    x = arma::trunc_log(y / (1 - y));
+  }
+}; // class LogisticFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/activation_functions/rectifier_function.hpp b/src/mlpack/methods/ann/activation_functions/rectifier_function.hpp
new file mode 100644
index 0000000..1241de1
--- /dev/null
+++ b/src/mlpack/methods/ann/activation_functions/rectifier_function.hpp
@@ -0,0 +1,115 @@
+/**
+ * @file rectifier_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the rectifier function as described by
+ * V. Nair and G. E. Hinton.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @misc{NairHinton2010,
+ *   author = {Vinod Nair, Geoffrey E. Hinton},
+ *   title = {Rectified Linear Units Improve Restricted Boltzmann Machines},
+ *   year = {2010}
+ * }
+ * @endcode
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_RECTIFIER_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_RECTIFIER_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <algorithm>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The rectifier function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \max(0, x) \\
+ * f'(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     1 & : x > 0 \\
+ *     0 & : x \le 0
+ *   \end{array}
+ * \right.
+ * @f}
+ */
+class RectifierFunction
+{
+ public:
+  /**
+   * Computes the rectifier function.
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  static double fn(const double x)
+  {
+    return std::max(0.0, x);
+  }
+
+  /**
+   * Computes the rectifier function using a dense matrix as input.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename eT>
+  static void fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
+  {
+    y = arma::max(arma::zeros<arma::Mat<eT> >(x.n_rows, x.n_cols), x);
+  }
+
+  /**
+   * Computes the rectifier function using a 3rd-order tensor as input.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename eT>
+  static void fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
+  {
+    y = x;
+    for (size_t s = 0; s < x.n_slices; s++)
+      fn(x.slice(s), y.slice(s));
+  }
+
+  /**
+   * Computes the first derivative of the rectifier function.
+   *
+   * @param x Input data.
+   * @return f'(x)
+   */
+  static double deriv(const double y)
+  {
+    return y > 0;
+  }
+
+  /**
+   * Computes the first derivatives of the rectifier function.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+  template<typename InputType, typename OutputType>
+  static void deriv(const InputType& y, OutputType& x)
+  {
+    x = y;
+
+    for (size_t i = 0; i < y.n_elem; i++)
+      x(i) = deriv(y(i));
+  }
+}; // class RectifierFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/activation_functions/softsign_function.hpp b/src/mlpack/methods/ann/activation_functions/softsign_function.hpp
new file mode 100644
index 0000000..45ef71b
--- /dev/null
+++ b/src/mlpack/methods/ann/activation_functions/softsign_function.hpp
@@ -0,0 +1,134 @@
+/**
+ * @file softsign_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the softsign function as described by
+ * X. Glorot and Y. Bengio.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @inproceedings{GlorotAISTATS2010,
+ *   title={title={Understanding the difficulty of training deep feedforward
+ *   neural networks},
+ *   author={Glorot, Xavier and Bengio, Yoshua},
+ *   booktitle={Proceedings of AISTATS 2010},
+ *   year={2010}
+ * }
+ * @endcode
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_SOFTSIGN_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_SOFTSIGN_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The softsign function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \frac{x}{1 + |x|} \\
+ * f'(x) &=& (1 - |x|)^2 \\
+ * f(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     -\frac{y}{y-1} & : x > 0 \\
+ *     \frac{x}{1 + x} & : x \le 0
+ *   \end{array}
+ * \right.
+ * @f}
+ */
+class SoftsignFunction
+{
+  public:
+  /**
+   * Computes the softsign function.
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  static double fn(const double x)
+  {
+    if (x < DBL_MAX)
+      return x > -DBL_MAX ? x / (1.0 + std::abs(x)) : -1.0;
+    return 1.0;
+  }
+
+  /**
+   * Computes the softsign function.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void fn(const InputVecType& x, OutputVecType& y)
+  {
+    y = x;
+
+    for (size_t i = 0; i < x.n_elem; i++)
+      y(i) = fn(x(i));
+  }
+
+  /**
+   * Computes the first derivative of the softsign function.
+   *
+   * @param y Input data.
+   * @return f'(x)
+   */
+  static double deriv(const double y)
+  {
+    return std::pow(1.0 - std::abs(y), 2);
+  }
+
+  /**
+   * Computes the first derivatives of the softsign function.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void deriv(const InputVecType& y, OutputVecType& x)
+  {
+    x = arma::pow(1.0 - arma::abs(y), 2);
+  }
+
+  /**
+   * Computes the inverse of the softsign function.
+   *
+   * @param y Input data.
+   * @return f^{-1}(y)
+   */
+  static double inv(const double y)
+  {
+    if (y > 0)
+      return y < 1 ? -y / (y - 1) : DBL_MAX;
+    else
+      return y > -1 ? y / (1 + y) : -DBL_MAX;
+  }
+
+  /**
+   * Computes the inverse of the softsign function.
+   *
+   * @param y Input data.
+   * @param x The resulting inverse of the input data.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void inv(const InputVecType& y, OutputVecType& x)
+  {
+    x = y;
+
+    for (size_t i = 0; i < y.n_elem; i++)
+      x(i) = inv(y(i));
+  }
+}; // class SoftsignFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/activation_functions/tanh_function.hpp b/src/mlpack/methods/ann/activation_functions/tanh_function.hpp
new file mode 100644
index 0000000..4cd81f2
--- /dev/null
+++ b/src/mlpack/methods/ann/activation_functions/tanh_function.hpp
@@ -0,0 +1,105 @@
+/**
+ * @file tanh_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the Tangens Hyperbolic function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_TANH_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_ACTIVATION_FUNCTIONS_TANH_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The tanh function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \frac{e^x - e^{-x}}{e^x + e^{-x}} \\
+ * f'(x) &=& 1 - \tanh^2(x) \\
+ * f^{-1}(x) &=& \arctan(x)
+ * @f}
+ */
+class TanhFunction
+{
+  public:
+  /**
+   * Computes the tanh function.
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  static double fn(const double x)
+  {
+    return std::tanh(x);
+  }
+
+  /**
+   * Computes the tanh function.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void fn(const InputVecType& x, OutputVecType& y)
+  {
+    y = arma::tanh(x);
+  }
+
+  /**
+   * Computes the first derivative of the tanh function.
+   *
+   * @param y Input data.
+   * @return f'(x)
+   */
+  static double deriv(const double y)
+  {
+    return 1 - std::pow(y, 2);
+  }
+
+  /**
+   * Computes the first derivatives of the tanh function.
+   *
+   * @param y Input data.
+   * @param x The resulting derivatives.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void deriv(const InputVecType& y, OutputVecType& x)
+  {
+    x = 1 - arma::pow(y, 2);
+  }
+
+  /**
+   * Computes the inverse of the tanh function.
+   *
+   * @param y Input data.
+   * @return f^{-1}(x)
+   */
+  static double inv(const double y)
+  {
+    return std::atanh(y);
+  }
+
+  /**
+   * Computes the inverse of the tanh function.
+   *
+   * @param y Input data.
+   * @param x The resulting inverse of the input data.
+   */
+  template<typename InputVecType, typename OutputVecType>
+  static void inv(const InputVecType& y, OutputVecType& x)
+  {
+    x = arma::atanh(y);
+  }
+}; // class TanhFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
new file mode 100644
index 0000000..0f01ed5
--- /dev/null
+++ b/src/mlpack/methods/ann/cnn.hpp
@@ -0,0 +1,448 @@
+/**
+ * @file cnn.hpp
+ * @author Shangtong Zhang
+ * @author Marcus Edel
+ *
+ * Definition of the CNN class, which implements convolutional neural networks.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_CNN_HPP
+#define MLPACK_METHODS_ANN_CNN_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include <mlpack/methods/ann/network_util.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
+#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a standard convolutional network.
+ *
+ * @tparam LayerTypes Contains all layer modules used to construct the network.
+ * @tparam OutputLayerType The outputlayer type used to evaluate the network.
+ * @tparam PerformanceFunction Performance strategy used to calculate the error.
+ */
+template <
+  typename LayerTypes,
+  typename OutputLayerType,
+  typename InitializationRuleType = NguyenWidrowInitialization,
+  class PerformanceFunction = CrossEntropyErrorFunction<>
+>
+class CNN
+{
+ public:
+  //! Convenience typedef for the internal model construction.
+  using NetworkType = CNN<LayerTypes,
+                          OutputLayerType,
+                          InitializationRuleType,
+                          PerformanceFunction>;
+
+  /**
+   * Create the CNN object with the given predictors and responses set (this is
+   * the set that is used to train the network) and the given optimizer.
+   * Optionally, specify which initialize rule and performance function should
+   * be used.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Outputlayer used to evaluate the network.
+   * @param predictors Input training variables.
+   * @param responses Outputs resulting from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network paramter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to claculate the error.
+   */
+  template<typename LayerType,
+           typename OutputType,
+           template<typename> class OptimizerType>
+  CNN(LayerType &&network,
+      OutputType &&outputLayer,
+      const arma::cube& predictors,
+      const arma::mat& responses,
+      OptimizerType<NetworkType>& optimizer,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Create the CNN object with the given predictors and responses set (this is
+   * the set that is used to train the network). Optionally, specify which
+   * initialize rule and performance function should be used.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Outputlayer used to evaluate the network.
+   * @param predictors Input training variables.
+   * @param responses Outputs resulting from input training variables.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network paramter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to claculate the error.
+   */
+  template<typename LayerType, typename OutputType>
+  CNN(LayerType &&network,
+      OutputType &&outputLayer,
+      const arma::cube& predictors,
+      const arma::mat& responses,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Create the CNN object with an empty predictors and responses set and
+   * default optimizer. Make sure to call Train(predictors, responses) when
+   * training.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Outputlayer used to evaluate the network.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network paramter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to claculate the error.
+   */
+  template<typename LayerType, typename OutputType>
+  CNN(LayerType &&network,
+      OutputType &&outputLayer,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+  /**
+   * Train the convolutional neural network on the given input data. By default, the
+   * RMSprop optimization algorithm is used, but others can be specified
+   * (such as mlpack::optimization::SGD).
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(const arma::cube& predictors, const arma::mat& responses);
+
+  /**
+   * Train the convolutional neural network with the given instantiated optimizer.
+   * Using this overload allows configuring the instantiated optimizer before
+   * training is performed.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Train the convolutional neural network on the given input data using the
+   * given optimizer.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(const arma::cube& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Predict the responses to a given set of predictors. The responses will
+   * reflect the output of the given output layer as returned by the
+   * OutputClass() function.
+   *
+   * @param predictors Input predictors.
+   * @param responses Matrix to put output predictions of responses into.
+   */
+  void Predict(arma::cube& predictors, arma::mat& responses);
+
+  /**
+   * Evaluate the convolutional neural network with the given parameters. This
+   * function is usually called by the optimizer to train the model.
+   *
+   * @param parameters Matrix model parameters.
+   * @param i Index of point to use for objective function evaluation.
+   * @param deterministic Whether or not to train or test the model. Note some
+   * layer act differently in training or testing mode.
+   */
+  double Evaluate(const arma::mat& parameters,
+                  const size_t i,
+                  const bool deterministic = true);
+
+  /**
+   * Evaluate the gradient of the convolutional neural network with the given
+   * parameters, and with respect to only one point in the dataset. This is
+   * useful for optimizers such as SGD, which require a separable objective
+   * function.
+   *
+   * @param parameters Matrix of the model parameters to be optimized.
+   * @param i Index of points to use for objective function gradient evaluation.
+   * @param gradient Matrix to output gradient into.
+   */
+  void Gradient(const arma::mat& parameters,
+                const size_t i,
+                arma::mat& gradient);
+
+  //! Return the number of separable functions (the number of predictor points).
+  size_t NumFunctions() const { return numFunctions; }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameter; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameter; }
+
+  /**
+   * Serialize the convolutional neural network.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  /**
+   * Reset the network by setting the layer status.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& network)
+  {
+    ResetDeterministic(std::get<I>(network));
+    ResetParameter<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Reset the layer status by setting the current deterministic parameter
+   * through all layer that implement the Deterministic function.
+   */
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& layer)
+  {
+    layer.Deterministic() = deterministic;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& /* unused */) { /* Nothing to do here */
+  }
+
+  /**
+   * Run a single iteration of the feed forward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  void Forward(const DataType& input, std::tuple<Tp...>& network)
+  {
+    std::get<I>(network).InputParameter() = input;
+
+    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
+                           std::get<I>(network).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    LinkParameter(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
+        std::get<I>(network).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Link the calculated activation with the connection layer.
+   */
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp...>& network)
+  {
+    if (!LayerTraits<typename std::remove_reference<
+        decltype(std::get<I>(network))>::type>::IsBiasLayer)
+    {
+      std::get<I>(network).InputParameter() = std::get<I - 1>(
+          network).OutputParameter();
+    }
+
+    LinkParameter<I + 1, Tp...>(network);
+  }
+
+  /*
+   * Calculate the output error and update the overall error.
+   */
+  template<typename DataType, typename ErrorType, typename... Tp>
+  double OutputError(const DataType& target,
+                     ErrorType& error,
+                     const std::tuple<Tp...>& network)
+  {
+    // Calculate and store the output error.
+    outputLayer.CalculateError(
+        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
+
+    // Masures the network's performance with the specified performance
+    // function.
+    return performanceFunc.Error(network, target, error);
+  }
+
+  /**
+   * Run a single iteration of the feed backward algorithm, using the given
+   * error of the output layer. Note that we iterate backward through the
+   * layer modules.
+   */
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
+  Backward(const DataType& error, std::tuple<Tp...>& network)
+  {
+    std::get<sizeof...(Tp) - I>(network).Backward(
+        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
+        std::get<sizeof...(Tp) - I>(network).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, network);
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& /* unused */,
+               std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
+  {
+    std::get<sizeof...(Tp) - I>(network).Backward(
+        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
+        std::get<sizeof...(Tp) - I>(network).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, network);
+  }
+
+  /**
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
+   */
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I == Max, void>::type
+  UpdateGradients(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I < Max, void>::type
+  UpdateGradients(std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
+           std::get<I + 1>(network).Delta());
+
+    UpdateGradients<I + 1, Max, Tp...>(network);
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  Update(T& layer, P& /* unused */, D& delta)
+  {
+    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  Update(T& /* unused */, P& /* unused */, D& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  /*
+   * Calculate and store the output activation.
+   */
+  template<typename DataType, typename... Tp>
+  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
+  {
+    // Calculate and store the output prediction.
+    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
+        network).OutputParameter(), output);
+  }
+
+  //! Instantiated convolutional neural network.
+  LayerTypes network;
+
+  //! The outputlayer used to evaluate the network
+  OutputLayerType& outputLayer;
+
+  //! Performance strategy used to claculate the error.
+  PerformanceFunction performanceFunc;
+
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
+
+  //! Matrix of (trained) parameters.
+  arma::mat parameter;
+
+  //! The matrix of data points (predictors).
+  arma::cube predictors;
+
+  //! The matrix of responses to the input data points.
+  arma::mat responses;
+
+  //! The number of separable functions (the number of predictor points).
+  size_t numFunctions;
+
+  //! Locally stored backward error.
+  arma::mat error;
+
+  //! Locally stored sample size.
+  size_t sampleSize;
+}; // class CNN
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "cnn_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp
new file mode 100644
index 0000000..4eaa038
--- /dev/null
+++ b/src/mlpack/methods/ann/convolution_rules/fft_convolution.hpp
@@ -0,0 +1,221 @@
+/**
+ * @file fft_convolution.hpp
+ * @author Shangtong Zhang
+ * @author Marcus Edel
+ *
+ * Implementation of the convolution through fft.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_FFT_CONVOLUTION_HPP
+#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_FFT_CONVOLUTION_HPP
+
+#include <mlpack/prereqs.hpp>
+#include "border_modes.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Computes the two-dimensional convolution through fft. This class allows
+ * specification of the type of the border type. The convolution can be compute
+ * with the valid border type of the full border type (default).
+ *
+ * FullConvolution: returns the full two-dimensional convolution.
+ * ValidConvolution: returns only those parts of the convolution that are
+ * computed without the zero-padded edges.
+ *
+ * @tparam BorderMode Type of the border mode (FullConvolution or
+ * ValidConvolution).
+ * @tparam padLastDim Pad the last dimension of the input to to turn it from
+ * odd to even.
+ */
+template<typename BorderMode = FullConvolution, const bool padLastDim = false>
+class FFTConvolution
+{
+ public:
+  /*
+   * Perform a convolution through fft (valid mode). This method only supports
+   * input which is even on the last dimension. In case of an odd input width, a
+   * user can manually pad the imput or specify the padLastDim parameter which
+   * takes care of the padding. The filter instead can have any size. When using
+   * the valid mode the filters has to be smaller than the input.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT, typename Border = BorderMode>
+  static typename std::enable_if<
+      std::is_same<Border, ValidConvolution>::value, void>::type
+  Convolution(const arma::Mat<eT>& input,
+              const arma::Mat<eT>& filter,
+              arma::Mat<eT>& output)
+  {
+    arma::Mat<eT> inputPadded = input;
+    arma::Mat<eT> filterPadded = filter;
+
+    if (padLastDim)
+      inputPadded.resize(inputPadded.n_rows, inputPadded.n_cols + 1);
+
+    // Pad filter and input to the output shape.
+    filterPadded.resize(inputPadded.n_rows, inputPadded.n_cols);
+
+    output = arma::real(ifft2(arma::fft2(inputPadded) % arma::fft2(
+        filterPadded)));
+
+    // Extract the region of interest. We don't need to handle the padLastDim in
+    // a special way we just cut it out from the output matrix.
+    output = output.submat(filter.n_rows - 1, filter.n_cols - 1,
+        input.n_rows - 1, input.n_cols - 1);
+  }
+
+  /*
+   * Perform a convolution through fft (full mode). This method only supports
+   * input which is even on the last dimension. In case of an odd input width, a
+   * user can manually pad the imput or specify the padLastDim parameter which
+   * takes care of the padding. The filter instead can have any size.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT, typename Border = BorderMode>
+  static typename std::enable_if<
+      std::is_same<Border, FullConvolution>::value, void>::type
+  Convolution(const arma::Mat<eT>& input,
+              const arma::Mat<eT>& filter,
+              arma::Mat<eT>& output)
+  {
+    // In case of the full convolution outputRows and outputCols doesn't
+    // represent the true output size when the padLastDim parameter is set,
+    // instead it's the working size.
+    const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1);
+    size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1);
+
+    if (padLastDim)
+        outputCols++;
+
+    // Pad filter and input to the working output shape.
+    arma::Mat<eT> inputPadded = arma::zeros<arma::Mat<eT> >(outputRows,
+        outputCols);
+    inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1,
+          filter.n_rows - 1 + input.n_rows - 1,
+          filter.n_cols - 1 + input.n_cols - 1) = input;
+
+    arma::Mat<eT> filterPadded = filter;
+    filterPadded.resize(outputRows, outputCols);
+
+    // Perform FFT and IFFT
+    output = arma::real(ifft2(arma::fft2(inputPadded) % arma::fft2(
+        filterPadded)));
+
+    // Extract the region of interest. We don't need to handle the padLastDim
+    // parameter in a special way we just cut it out from the output matrix.
+    output = output.submat(filter.n_rows - 1, filter.n_cols - 1,
+        2 * (filter.n_rows - 1) + input.n_rows - 1,
+        2 * (filter.n_cols - 1) + input.n_cols - 1);
+  }
+
+  /*
+   * Perform a convolution through fft using 3rd order tensors. This method only
+   * supports input which is even on the last dimension. In case of an odd input
+   * width, a user can manually pad the imput or specify the padLastDim
+   * parameter which takes care of the padding. The filter instead can have any
+   * size.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Cube<eT>& input,
+                          const arma::Cube<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    FFTConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        input.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < input.n_slices; i++)
+    {
+      FFTConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
+          convOutput);
+      output.slice(i) = convOutput;
+    }
+  }
+
+  /*
+   * Perform a convolution through fft using dense matrix as input and a 3rd
+   * order tensors as filter and output. This method only supports input which
+   * is even on the last dimension. In case of an odd input width, a user can
+   * manually pad the imput or specify the padLastDim parameter which takes care
+   * of the padding. The filter instead can have any size.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Mat<eT>& input,
+                          const arma::Cube<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    FFTConvolution<BorderMode>::Convolution(input, filter.slice(0),
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        filter.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < filter.n_slices; i++)
+    {
+      FFTConvolution<BorderMode>::Convolution(input, filter.slice(i),
+          convOutput);
+      output.slice(i) = convOutput;
+    }
+  }
+
+  /*
+   * Perform a convolution using a 3rd order tensors as input and output and a
+   * dense matrix as filter.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Cube<eT>& input,
+                          const arma::Mat<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    FFTConvolution<BorderMode>::Convolution(input.slice(0), filter,
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        input.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < input.n_slices; i++)
+    {
+      FFTConvolution<BorderMode>::Convolution(input.slice(i), filter,
+          convOutput);
+      output.slice(i) = convOutput;
+    }
+  }
+
+};  // class FFTConvolution
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
new file mode 100644
index 0000000..0e4e05f
--- /dev/null
+++ b/src/mlpack/methods/ann/convolution_rules/naive_convolution.hpp
@@ -0,0 +1,190 @@
+/**
+ * @file naive_convolution.hpp
+ * @author Shangtong Zhang
+ * @author Marcus Edel
+ *
+ * Implementation of the convolution.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_NAIVE_CONVOLUTION_HPP
+#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_NAIVE_CONVOLUTION_HPP
+
+#include <mlpack/prereqs.hpp>
+#include "border_modes.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Computes the two-dimensional convolution. This class allows specification of
+ * the type of the border type. The convolution can be compute with the valid
+ * border type of the full border type (default).
+ *
+ * FullConvolution: returns the full two-dimensional convolution.
+ * ValidConvolution: returns only those parts of the convolution that are
+ * computed without the zero-padded edges.
+ *
+ * @tparam BorderMode Type of the border mode (FullConvolution or
+ * ValidConvolution).
+ */
+template<typename BorderMode = FullConvolution>
+class NaiveConvolution
+{
+ public:
+  /*
+   * Perform a convolution (valid mode).
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT, typename Border = BorderMode>
+  static typename std::enable_if<
+      std::is_same<Border, ValidConvolution>::value, void>::type
+  Convolution(const arma::Mat<eT>& input,
+              const arma::Mat<eT>& filter,
+              arma::Mat<eT>& output)
+  {
+    output = arma::zeros<arma::Mat<eT> >(input.n_rows - filter.n_rows + 1,
+        input.n_cols - filter.n_cols + 1);
+
+    // It seems to be about 3.5 times faster to use pointers instead of
+    // filter(ki, kj) * input(leftInput + ki, topInput + kj) and output(i, j).
+    eT* outputPtr = output.memptr();
+
+    for (size_t j = 0; j < output.n_cols; ++j)
+    {
+      for (size_t i = 0; i < output.n_rows; ++i, outputPtr++)
+      {
+        const eT* kernelPtr = filter.memptr();
+        for (size_t kj = 0; kj < filter.n_cols; ++kj)
+        {
+          const eT* inputPtr = input.colptr(kj + j) + i;
+          for (size_t ki = 0; ki < filter.n_rows; ++ki, ++kernelPtr, ++inputPtr)
+            *outputPtr += *kernelPtr * (*inputPtr);
+        }
+      }
+    }
+  }
+
+  /*
+   * Perform a convolution (full mode).
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT, typename Border = BorderMode>
+  static typename std::enable_if<
+      std::is_same<Border, FullConvolution>::value, void>::type
+  Convolution(const arma::Mat<eT>& input,
+              const arma::Mat<eT>& filter,
+              arma::Mat<eT>& output)
+  {
+    const size_t outputRows = input.n_rows + 2 * (filter.n_rows - 1);
+    const size_t outputCols = input.n_cols + 2 * (filter.n_cols - 1);
+
+    // Pad filter and input to the working output shape.
+    arma::Mat<eT> inputPadded = arma::zeros<arma::Mat<eT> >(outputRows,
+        outputCols);
+    inputPadded.submat(filter.n_rows - 1, filter.n_cols - 1,
+          filter.n_rows - 1 + input.n_rows - 1,
+          filter.n_cols - 1 + input.n_cols - 1) = input;
+
+    NaiveConvolution<ValidConvolution>::Convolution(inputPadded, filter,
+        output);
+  }
+
+  /*
+   * Perform a convolution using 3rd order tensors.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Cube<eT>& input,
+                          const arma::Cube<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    NaiveConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        input.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < input.n_slices; i++)
+    {
+      NaiveConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
+          output.slice(i));
+    }
+  }
+
+  /*
+   * Perform a convolution using dense matrix as input and a 3rd order tensors
+   * as filter and output.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Mat<eT>& input,
+                          const arma::Cube<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    NaiveConvolution<BorderMode>::Convolution(input, filter.slice(0),
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        filter.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < filter.n_slices; i++)
+    {
+      NaiveConvolution<BorderMode>::Convolution(input, filter.slice(i),
+          output.slice(i));
+    }
+  }
+
+  /*
+   * Perform a convolution using a 3rd order tensors as input and output and a
+   * dense matrix as filter.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Cube<eT>& input,
+                          const arma::Mat<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    NaiveConvolution<BorderMode>::Convolution(input.slice(0), filter,
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        input.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < input.n_slices; i++)
+    {
+      NaiveConvolution<BorderMode>::Convolution(input.slice(i), filter,
+          output.slice(i));
+    }
+  }
+
+};  // class NaiveConvolution
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
new file mode 100644
index 0000000..e34d8bd
--- /dev/null
+++ b/src/mlpack/methods/ann/convolution_rules/svd_convolution.hpp
@@ -0,0 +1,199 @@
+/**
+ * @file svd_convolution.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the convolution using the singular value decomposition to
+ * speeded up the computation.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_CONVOLUTION_RULES_SVD_CONVOLUTION_HPP
+#define MLPACK_METHODS_ANN_CONVOLUTION_RULES_SVD_CONVOLUTION_HPP
+
+#include <mlpack/prereqs.hpp>
+#include "border_modes.hpp"
+#include "fft_convolution.hpp"
+#include "naive_convolution.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Computes the two-dimensional convolution using singular value decomposition.
+ * This class allows specification of the type of the border type. The
+ * convolution can be compute with the valid border type of the full border
+ * type (default).
+ *
+ * FullConvolution: returns the full two-dimensional convolution.
+ * ValidConvolution: returns only those parts of the convolution that are
+ * computed without the zero-padded edges.
+ *
+ * @tparam BorderMode Type of the border mode (FullConvolution or
+ * ValidConvolution).
+ */
+template<typename BorderMode = FullConvolution>
+class SVDConvolution
+{
+ public:
+  /*
+   * Perform a convolution (valid or full mode) using singular value
+   * decomposition. By using singular value decomposition of the filter matrix
+   * the convolution can be expressed as a sum of outer products. Each product
+   * can be computed efficiently as convolution with a row and a column vector.
+   * The individual convolutions are computed with the naive implementation
+   * which is fast if the filter is low-dimensional.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Mat<eT>& input,
+                          const arma::Mat<eT>& filter,
+                          arma::Mat<eT>& output)
+  {
+    // Use the naive convolution in case the filter isn't two dimensional or the
+    // filter is bigger than the input.
+    if (filter.n_rows > input.n_rows || filter.n_cols > input.n_cols ||
+        filter.n_rows == 1 || filter.n_cols == 1)
+    {
+      NaiveConvolution<BorderMode>::Convolution(input, filter, output);
+    }
+    else
+    {
+      arma::Mat<eT> U, V, subOutput;
+      arma::Col<eT> s;
+
+      arma::svd_econ(U, s, V, filter);
+
+      // Rank approximation using the singular values calculated with singular
+      // value decomposition of dense filter matrix.
+      const size_t rank = arma::sum(s > (s.n_elem * arma::max(s) *
+          arma::datum::eps));
+
+      // Test for separability based on the rank of the kernel and take
+      // advantage of the low rank.
+      if (rank * (filter.n_rows + filter.n_cols) < filter.n_elem)
+      {
+        arma::Mat<eT> subFilter = V.unsafe_col(0) * s(0);
+        NaiveConvolution<BorderMode>::Convolution(input, subFilter, subOutput);
+
+        subOutput = subOutput.t();
+        NaiveConvolution<BorderMode>::Convolution(subOutput, U.unsafe_col(0),
+            output);
+
+        for (size_t r = 1; r < rank; r++)
+        {
+          subFilter = V.unsafe_col(r) * s(r);
+          NaiveConvolution<BorderMode>::Convolution(input, subFilter,
+              subOutput);
+
+          arma::Mat<eT> temp;
+          subOutput = subOutput.t();
+          NaiveConvolution<BorderMode>::Convolution(subOutput, U.unsafe_col(r),
+              temp);
+          output += temp;
+        }
+
+        output = output.t();
+      }
+      else
+      {
+        FFTConvolution<BorderMode>::Convolution(input, filter, output);
+      }
+    }
+  }
+
+  /*
+   * Perform a convolution using 3rd order tensors.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Cube<eT>& input,
+                          const arma::Cube<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    SVDConvolution<BorderMode>::Convolution(input.slice(0), filter.slice(0),
+        convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        input.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < input.n_slices; i++)
+    {
+      SVDConvolution<BorderMode>::Convolution(input.slice(i), filter.slice(i),
+          convOutput);
+      output.slice(i) = convOutput;
+    }
+  }
+
+  /*
+   * Perform a convolution using dense matrix as input and a 3rd order tensors
+   * as filter and output.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Mat<eT>& input,
+                          const arma::Cube<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    SVDConvolution<BorderMode>::Convolution(input, filter.slice(0), convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        filter.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < filter.n_slices; i++)
+    {
+      SVDConvolution<BorderMode>::Convolution(input, filter.slice(i),
+          convOutput);
+      output.slice(i) = convOutput;
+    }
+  }
+
+  /*
+   * Perform a convolution using a 3rd order tensors as input and output and a
+   * dense matrix as filter.
+   *
+   * @param input Input used to perform the convolution.
+   * @param filter Filter used to perform the conolution.
+   * @param output Output data that contains the results of the convolution.
+   */
+  template<typename eT>
+  static void Convolution(const arma::Cube<eT>& input,
+                          const arma::Mat<eT>& filter,
+                          arma::Cube<eT>& output)
+  {
+    arma::Mat<eT> convOutput;
+    SVDConvolution<BorderMode>::Convolution(input.slice(0), filter, convOutput);
+
+    output = arma::Cube<eT>(convOutput.n_rows, convOutput.n_cols,
+        input.n_slices);
+    output.slice(0) = convOutput;
+
+    for (size_t i = 1; i < input.n_slices; i++)
+    {
+      SVDConvolution<BorderMode>::Convolution(input.slice(i), filter,
+          convOutput);
+      output.slice(i) = convOutput;
+    }
+  }
+
+};  // class SVDConvolution
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
new file mode 100644
index 0000000..0046536
--- /dev/null
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -0,0 +1,447 @@
+/**
+ * @file ffn.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the FFN class, which implements feed forward neural networks.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_FFN_HPP
+#define MLPACK_METHODS_ANN_FFN_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include <mlpack/methods/ann/network_util.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
+#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of a standard feed forward network.
+ *
+ * @tparam LayerTypes Contains all layer modules used to construct the network.
+ * @tparam OutputLayerType The output layer type used to evaluate the network.
+ * @tparam InitializationRuleType Rule used to initialize the weight matrix.
+ * @tparam PerformanceFunction Performance strategy used to calculate the error.
+ */
+template <
+  typename LayerTypes,
+  typename OutputLayerType,
+  typename InitializationRuleType = NguyenWidrowInitialization,
+  class PerformanceFunction = CrossEntropyErrorFunction<>
+>
+class FFN
+{
+ public:
+  //! Convenience typedef for the internal model construction.
+  using NetworkType = FFN<LayerTypes,
+                          OutputLayerType,
+                          InitializationRuleType,
+                          PerformanceFunction>;
+
+  /**
+   * Create the FFN object with the given predictors and responses set (this is
+   * the set that is used to train the network) and the given optimizer.
+   * Optionally, specify which initialize rule and performance function should
+   * be used.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Output layer used to evaluate the network.
+   * @param predictors Input training variables.
+   * @param responses Outputs resulting from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network parameter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to calculate the error.
+   */
+  template<typename LayerType,
+           typename OutputType,
+           template<typename> class OptimizerType>
+  FFN(LayerType &&network,
+      OutputType &&outputLayer,
+      const arma::mat& predictors,
+      const arma::mat& responses,
+      OptimizerType<NetworkType>& optimizer,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Create the FFN object with the given predictors and responses set (this is
+   * the set that is used to train the network). Optionally, specify which
+   * initialize rule and performance function should be used.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Output layer used to evaluate the network.
+   * @param predictors Input training variables.
+   * @param responses Outputs resulting from input training variables.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network parameter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to calculate the error.
+   */
+  template<typename LayerType, typename OutputType>
+  FFN(LayerType &&network,
+      OutputType &&outputLayer,
+      const arma::mat& predictors,
+      const arma::mat& responses,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Create the FNN object with an empty predictors and responses set and
+   * default optimizer. Make sure to call Train(predictors, responses) when
+   * training.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Output layer used to evaluate the network.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network parameter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to calculate the error.
+   */
+  template<typename LayerType, typename OutputType>
+  FFN(LayerType &&network,
+      OutputType &&outputLayer,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Train the feedforward network on the given input data. By default, the
+   * RMSprop optimization algorithm is used, but others can be specified
+   * (such as mlpack::optimization::SGD).
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(const arma::mat& predictors, const arma::mat& responses);
+
+  /**
+   * Train the feedforward network with the given instantiated optimizer.
+   * Using this overload allows configuring the instantiated optimizer before
+   * training is performed.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Train the feedforward network on the given input data using the given
+   * optimizer.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(const arma::mat& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Predict the responses to a given set of predictors. The responses will
+   * reflect the output of the given output layer as returned by the
+   * OutputClass() function.
+   *
+   * @param predictors Input predictors.
+   * @param responses Matrix to put output predictions of responses into.
+   */
+  void Predict(arma::mat& predictors, arma::mat& responses);
+
+  /**
+   * Evaluate the feedforward network with the given parameters. This function
+   * is usually called by the optimizer to train the model.
+   *
+   * @param parameters Matrix model parameters.
+   * @param i Index of point to use for objective function evaluation.
+   * @param deterministic Whether or not to train or test the model. Note some
+   * layer act differently in training or testing mode.
+   */
+  double Evaluate(const arma::mat& parameters,
+                  const size_t i,
+                  const bool deterministic = true);
+
+  /**
+   * Evaluate the gradient of the feedforward network with the given parameters,
+   * and with respect to only one point in the dataset. This is useful for
+   * optimizers such as SGD, which require a separable objective function.
+   *
+   * @param parameters Matrix of the model parameters to be optimized.
+   * @param i Index of points to use for objective function gradient evaluation.
+   * @param gradient Matrix to output gradient into.
+   */
+  void Gradient(const arma::mat& parameters,
+                const size_t i,
+                arma::mat& gradient);
+
+  //! Return the number of separable functions (the number of predictor points).
+  size_t NumFunctions() const { return numFunctions; }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameter; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameter; }
+
+  //! Serialize the model.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+private:
+  /**
+   * Reset the network by zeroing the layer activations and by setting the
+   * layer status.
+   *
+   * enable_if (SFINAE) is used to iterate through the network. The general
+   * case peels off the first type and recurses, as usual with
+   * variadic function templates.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& network)
+  {
+    ResetDeterministic(std::get<I>(network));
+    ResetParameter<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Reset the layer status by setting the current deterministic parameter
+   * through all layer that implement the Deterministic function.
+   */
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& layer)
+  {
+    layer.Deterministic() = deterministic;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& /* unused */) { /* Nothing to do here */ }
+
+  /**
+   * Run a single iteration of the feed forward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  void Forward(const DataType& input, std::tuple<Tp...>& network)
+  {
+    std::get<I>(network).InputParameter() = input;
+
+    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
+        std::get<I>(network).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    LinkParameter(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
+                           std::get<I>(network).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Link the calculated activation with the connection layer.
+   */
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp ...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp...>& network)
+  {
+    if (!LayerTraits<typename std::remove_reference<
+        decltype(std::get<I>(network))>::type>::IsBiasLayer)
+    {
+      std::get<I>(network).InputParameter() = std::get<I - 1>(
+          network).OutputParameter();
+    }
+
+    LinkParameter<I + 1, Tp...>(network);
+  }
+
+  /*
+   * Calculate the output error and update the overall error.
+   */
+  template<typename DataType, typename ErrorType, typename... Tp>
+  double OutputError(const DataType& target,
+                     ErrorType& error,
+                     const std::tuple<Tp...>& network)
+  {
+    // Calculate and store the output error.
+    outputLayer.CalculateError(
+        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
+
+    // Measures the network's performance with the specified performance
+    // function.
+    return performanceFunc.Error(network, target, error);
+  }
+
+  /**
+   * Run a single iteration of the feed backward algorithm, using the given
+   * error of the output layer. Note that we iterate backward through the
+   * layer modules.
+   */
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
+  Backward(const DataType& error, std::tuple<Tp ...>& network)
+  {
+    std::get<sizeof...(Tp) - I>(network).Backward(
+        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
+        std::get<sizeof...(Tp) - I>(network).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, network);
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& /* unused */,
+               std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
+  {
+    std::get<sizeof...(Tp) - I>(network).Backward(
+        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
+        std::get<sizeof...(Tp) - I>(network).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, network);
+  }
+
+  /**
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
+   */
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I == Max, void>::type
+  UpdateGradients(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I < Max, void>::type
+  UpdateGradients(std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
+           std::get<I + 1>(network).Delta());
+
+    UpdateGradients<I + 1, Max, Tp...>(network);
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  Update(T& layer, P& /* unused */, D& delta)
+  {
+    layer.Gradient(layer.InputParameter(), delta, layer.Gradient());
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  Update(T& /* unused */, P& /* unused */, D& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  /*
+   * Calculate and store the output activation.
+   */
+  template<typename DataType, typename... Tp>
+  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
+  {
+    // Calculate and store the output prediction.
+    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
+        network).OutputParameter(), output);
+  }
+
+  //! Instantiated feedforward network.
+  LayerTypes network;
+
+  //! The output layer used to evaluate the network
+  OutputLayerType outputLayer;
+
+  //! Performance strategy used to calculate the error.
+  PerformanceFunction performanceFunc;
+
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
+
+  //! Matrix of (trained) parameters.
+  arma::mat parameter;
+
+  //! The matrix of data points (predictors).
+  arma::mat predictors;
+
+  //! The matrix of responses to the input data points.
+  arma::mat responses;
+
+  //! The number of separable functions (the number of predictor points).
+  size_t numFunctions;
+
+  //! Locally stored backward error.
+  arma::mat error;
+}; // class FFN
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "ffn_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp b/src/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp
new file mode 100644
index 0000000..82e36ca
--- /dev/null
+++ b/src/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp
@@ -0,0 +1,121 @@
+/**
+ * @file kathirvalavakumar_subavathi_init.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the initialization method by T.
+ * Kathirvalavakumar and S. Subavathi. This initialization rule is based on
+ * sensitivity analysis using cauchy’s inequality.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @inproceedings{KathirvalavakumarJILSA2011,
+ *   title={A New Weight Initialization Method Using Cauchy’s Inequality Based
+ *   on Sensitivity Analysis},
+ *   author={T. Kathirvalavakumar and S. Subavathi},
+ *   booktitle={Journal of Intelligent Learning Systems and Applications,
+ *   Vol. 3 No. 4},
+ *   year={2011}
+ * }
+ * @endcode
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_INIT_RULES_KATHIRVALAVAKUMAR_SUBAVATHI_INIT_HPP
+#define MLPACK_METHODS_ANN_INIT_RULES_KATHIRVALAVAKUMAR_SUBAVATHI_INIT_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+#include <iostream>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class is used to initialize the weight matrix with the method proposed
+ * by T. Kathirvalavakumar and S. Subavathi. The method is based on sensitivity
+ * analysis using using cauchy’s inequality. The method is defined by
+ *
+ * @f{eqnarray*}{
+ * \overline{s} &=& f^{-1}(\overline{t}) \\
+ * \Theta^{1}_{p} &\le& \overline{s}
+ *     \sqrt{\frac{3}{I \sum_{i = 1}^{I} (x_{ip}^2)}} \\
+ * \Theta^1 &=& min(\Theta_{p}^{1}); p=1,2,..,P \\
+ * -\Theta^{1} \le w_{i}^{1} &\le& \Theta^{1}
+ * @f}
+ *
+ * where I is the number of inputs including the bias, p refers the pattern
+ * considered in training, f is the transfer function and \={s} is the active
+ * region in which the derivative of the activation function is greater than 4%
+ * of the maximum derivatives.
+ */
+class KathirvalavakumarSubavathiInitialization
+{
+ public:
+  /**
+   * Initialize the random initialization rule with the given values.
+   *
+   * @param data The input patterns.
+   * @param s Parameter that defines the active region.
+   */
+  template<typename eT>
+  KathirvalavakumarSubavathiInitialization(const arma::Mat<eT>& data,
+                                           const double s) : s(s)
+  {
+    dataSum = arma::sum(data % data);
+  }
+
+  /**
+   * Initialize the elements of the specified weight matrix with the
+   * Kathirvalavakumar-Subavathi method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
+  {
+    arma::Row<eT> b = s * arma::sqrt(3 / (rows * dataSum));
+    const double theta = b.min();
+    RandomInitialization randomInit(-theta, theta);
+    randomInit.Initialize(W, rows, cols);
+  }
+
+  /**
+   * Initialize the elements of the specified weight 3rd order tensor with the
+   * Kathirvalavakumar-Subavathi method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Cube<eT>& W,
+                  const size_t rows,
+                  const size_t cols,
+                  const size_t slices)
+  {
+    W = arma::Cube<eT>(rows, cols, slices);
+
+    for (size_t i = 0; i < slices; i++)
+      Initialize(W.slice(i), rows, cols);
+  }
+
+ private:
+  //! Parameter that defines the sum of elements in each column.
+  arma::rowvec dataSum;
+
+  //! Parameter that defines the active region.
+  const double s;
+}; // class KathirvalavakumarSubavathiInitialization
+
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp b/src/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp
new file mode 100644
index 0000000..689a2ff
--- /dev/null
+++ b/src/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp
@@ -0,0 +1,117 @@
+/**
+ * @file nguyen_widrow_init.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the Nguyen-Widrow method. This
+ * initialization rule initialize the weights so that the active regions of the
+ * neurons are approximately evenly distributed over the input space.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @inproceedings{NguyenIJCNN1990,
+ *   title={Improving the learning speed of 2-layer neural networks by choosing
+ *   initial values of the adaptive weights},
+ *   booktitle={Neural Networks, 1990., 1990 IJCNN International Joint
+ *   Conference on},
+ *   year={1990}
+ * }
+ * @endcode
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_INIT_RULES_NGUYEN_WIDROW_INIT_HPP
+#define MLPACK_METHODS_ANN_INIT_RULES_NGUYEN_WIDROW_INIT_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include "random_init.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class is used to initialize the weight matrix with the Nguyen-Widrow
+ * method. The method is defined by
+ *
+ * @f{eqnarray*}{
+ * \gamma &\le& w_i \le \gamma \\
+ * \beta &=& 0.7H^{\frac{1}{I}} \\
+ * n &=& \sqrt{\sum_{i=0}{I}w_{i}^{2}} \\
+ * w_i &=& \frac{\beta w_i}{n}
+ * @f}
+ *
+ * Where H is the number of neurons in the outgoing layer, I represents the
+ * number of neurons in the ingoing layer and gamma defines the random interval
+ * that is used to initialize the weights with a random value in a specific
+ * range.
+ */
+class NguyenWidrowInitialization
+{
+ public:
+  /**
+   * Initialize the random initialization rule with the given lower bound and
+   * upper bound.
+   *
+   * @param lowerBound The number used as lower bound.
+   * @param upperBound The number used as upper bound.
+   */
+  NguyenWidrowInitialization(const double lowerBound = -0.5,
+                             const double upperBound = 0.5) :
+      lowerBound(lowerBound), upperBound(upperBound) { }
+
+  /**
+   * Initialize the elements of the specified weight matrix with the
+   * Nguyen-Widrow method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
+  {
+    RandomInitialization randomInit(lowerBound, upperBound);
+    randomInit.Initialize(W, rows, cols);
+
+    double beta = 0.7 * std::pow(cols, 1 / rows);
+    W *= (beta / arma::norm(W));
+  }
+
+  /**
+   * Initialize the elements of the specified weight 3rd order tensor with the
+   * Nguyen-Widrow method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   * @param slices Number of slices.
+   */
+  template<typename eT>
+  void Initialize(arma::Cube<eT>& W,
+                  const size_t rows,
+                  const size_t cols,
+                  const size_t slices)
+  {
+    W = arma::Cube<eT>(rows, cols, slices);
+
+    for (size_t i = 0; i < slices; i++)
+      Initialize(W.slice(i), rows, cols);
+  }
+
+ private:
+  //! The number used as lower bound.
+  const double lowerBound;
+
+  //! The number used as upper bound.
+  const double upperBound;
+}; // class NguyenWidrowInitialization
+
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/init_rules/oivs_init.hpp b/src/mlpack/methods/ann/init_rules/oivs_init.hpp
new file mode 100644
index 0000000..5b68753
--- /dev/null
+++ b/src/mlpack/methods/ann/init_rules/oivs_init.hpp
@@ -0,0 +1,130 @@
+/**
+ * @file oivs_init.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the Optimal Initial Value Setting method
+ * (OIVS). This initialization rule is based on geometrical considerations as
+ * described by H. Shimodaira.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @inproceedings{ShimodairaICTAI1994,
+ *   title={A weight value initialization method for improving learning
+ *   performance of the backpropagation algorithm in neural networks},
+ *   author={Shimodaira, H.},
+ *   booktitle={Tools with Artificial Intelligence, 1994. Proceedings.,
+ *   Sixth International Conference on},
+ *   year={1994}
+ * }
+ * @endcode
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_INIT_RULES_OIVS_INIT_HPP
+#define MLPACK_METHODS_ANN_INIT_RULES_OIVS_INIT_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
+
+#include "random_init.hpp"
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class is used to initialize the weight matrix with the oivs method. The
+ * method is based on the equations representing the characteristics of the
+ * information transformation mechanism of a node. The method is defined by
+ *
+ * @f{eqnarray*}{
+ * b &=& |F^{-1}(1 - \epsilon) - f^{-1}(\epsilon)| \\
+ * \hat{w} &=& \frac{b}{k \cdot n} \\
+ * \gamma &\le& a_i \le \gamma \\
+ * w_i &=& \hat{w} \cdot \sqrt{a_i + 1}
+ * @f}
+ *
+ * Where f is the transfer function epsilon, k custom parameters, n the number of
+ * neurons in the outgoing layer and gamma a parameter that defines the random
+ * interval.
+ *
+ * @tparam ActivationFunction The activation function used for the oivs method.
+ */
+template<
+    class ActivationFunction = LogisticFunction
+>
+class OivsInitialization
+{
+ public:
+  /**
+   * Initialize the random initialization rule with the given values.
+   *
+   * @param epsilon Parameter to control the activation region.
+   * @param k Parameter to control the activation region width.
+   * @param gamma Parameter to define the uniform random range.
+   */
+  OivsInitialization(const double epsilon = 0.1,
+                     const int k = 5,
+                     const double gamma = 0.9) :
+      k(k), gamma(gamma),
+      b(std::abs(ActivationFunction::inv(1 - epsilon) -
+                 ActivationFunction::inv(epsilon)))
+  {
+  }
+
+  /**
+   * Initialize the elements of the specified weight matrix with the oivs method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
+  {
+    RandomInitialization randomInit(-gamma, gamma);
+    randomInit.Initialize(W, rows, cols);
+
+    W = (b / (k  * rows)) * arma::sqrt(W + 1);
+  }
+
+  /**
+   * Initialize the elements of the specified weight 3rd order tensor with the
+   * oivs method.
+   *
+   * @param W 3rd order tensor to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   * @param slices Number of slices.
+   */
+  template<typename eT>
+  void Initialize(arma::Cube<eT>& W,
+                  const size_t rows,
+                  const size_t cols,
+                  const size_t slices)
+  {
+    W = arma::Cube<eT>(rows, cols, slices);
+
+    for (size_t i = 0; i < slices; i++)
+      Initialize(W.slice(i), rows, cols);
+  }
+
+ private:
+  //! Parameter to control the activation region width.
+  const int k;
+
+  //! Parameter to define the uniform random range.
+  const double gamma;
+
+  //! Parameter to control the activation region.
+  const double b;
+}; // class OivsInitialization
+
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/init_rules/orthogonal_init.hpp b/src/mlpack/methods/ann/init_rules/orthogonal_init.hpp
new file mode 100644
index 0000000..f7d2954
--- /dev/null
+++ b/src/mlpack/methods/ann/init_rules/orthogonal_init.hpp
@@ -0,0 +1,82 @@
+/**
+ * @file orthogonal_init.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the orthogonal matrix initialization method.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_INIT_RULES_ORTHOGONAL_INIT_HPP
+#define MLPACK_METHODS_ANN_INIT_RULES_ORTHOGONAL_INIT_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class is used to initialize the weight matrix with the orthogonal
+ * matrix initialization
+ */
+class OrthogonalInitialization
+{
+ public:
+  /**
+   * Initialize the orthogonal matrix initialization rule with the given gain.
+   *
+   * @param gain The gain value.
+   */
+  OrthogonalInitialization(const double gain = 1.0) : gain(gain) { }
+
+  /**
+   * Initialize the elements of the specified weight matrix with the orthogonal
+   * matrix initialization method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
+  {
+    arma::Mat<eT> V;
+    arma::Col<eT> s;
+
+    arma::svd_econ(W, s, V, arma::randu<arma::Mat<eT> >(rows, cols));
+    W *= gain;
+  }
+
+  /**
+   * Initialize the elements of the specified weight 3rd order tensor with the
+   * orthogonal matrix initialization method.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   * @param slices Number of slices.
+   */
+  template<typename eT>
+  void Initialize(arma::Cube<eT>& W,
+                  const size_t rows,
+                  const size_t cols,
+                  const size_t slices)
+  {
+    W = arma::Cube<eT>(rows, cols, slices);
+
+    for (size_t i = 0; i < slices; i++)
+      Initialize(W.slice(i), rows, cols);
+  }
+
+ private:
+  //! The number used as gain.
+  const double gain;
+}; // class OrthogonalInitialization
+
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/init_rules/random_init.hpp b/src/mlpack/methods/ann/init_rules/random_init.hpp
index 4d720db..5207a97 100644
--- a/src/mlpack/methods/ann/init_rules/random_init.hpp
+++ b/src/mlpack/methods/ann/init_rules/random_init.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_ANN_INIT_RULES_RANDOM_INIT_HPP
 #define MLPACK_METHODS_ANN_INIT_RULES_RANDOM_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace ann /** Artificial Neural Network. */ {
diff --git a/src/mlpack/methods/ann/init_rules/zero_init.hpp b/src/mlpack/methods/ann/init_rules/zero_init.hpp
new file mode 100644
index 0000000..f6aec7b
--- /dev/null
+++ b/src/mlpack/methods/ann/init_rules/zero_init.hpp
@@ -0,0 +1,65 @@
+/**
+ * @file zero_init.hpp
+ * @author Marcus Edel
+ *
+ * Intialization rule for the neural networks. This simple initialization is
+ * performed by assigning a zero matrix to the weight matrix.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_INIT_RULES_ZERO_INIT_HPP
+#define MLPACK_METHODS_ANN_INIT_RULES_ZERO_INIT_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class is used to initialize randomly the weight matrix.
+ */
+class ZeroInitialization
+{
+ public:
+  /**
+   *  Create the ZeroInitialization object.
+   */
+  ZeroInitialization() { /* Nothing to do here */ }
+
+  /**
+   * Initialize the elements of the specified weight matrix.
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
+  {
+    W = arma::zeros<arma::Mat<eT> >(rows, cols);
+  }
+
+  /**
+   * Initialize the elements of the specified weight (3rd order tensor).
+   *
+   * @param W Weight matrix to initialize.
+   * @param rows Number of rows.
+   * @param cols Number of columns.
+   */
+  template<typename eT>
+  void Initialize(arma::Cube<eT>& W,
+                  const size_t rows,
+                  const size_t cols,
+                  const size_t slices)
+  {
+    W = arma::zeros<arma::Cube<eT> >(rows, cols, slices);
+  }
+}; // class ZeroInitialization
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/base_layer.hpp b/src/mlpack/methods/ann/layer/base_layer.hpp
new file mode 100644
index 0000000..9af543b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/base_layer.hpp
@@ -0,0 +1,223 @@
+/**
+ * @file base_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the BaseLayer class, which attaches various functions to the
+ * embedding layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_BASE_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_BASE_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/activation_functions/logistic_function.hpp>
+#include <mlpack/methods/ann/activation_functions/identity_function.hpp>
+#include <mlpack/methods/ann/activation_functions/rectifier_function.hpp>
+#include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the base layer. The base layer works as a metaclass which
+ * attaches various functions to the embedding layer.
+ *
+ * A few convenience typedefs are given:
+ *
+ *  - SigmoidLayer
+ *  - IdentityLayer
+ *  - ReLULayer
+ *  - TanHLayer
+ *  - BaseLayer2D
+ *
+ * @tparam ActivationFunction Activation function used for the embedding layer.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    class ActivationFunction = LogisticFunction,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class BaseLayer
+{
+ public:
+  /**
+   * Create the BaseLayer object.
+   */
+  BaseLayer()
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType& input, OutputType& output)
+  {
+    ActivationFunction::fn(input, output);
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& input,
+                const DataType& gy,
+                DataType& g)
+  {
+    DataType derivative;
+    ActivationFunction::deriv(input, derivative);
+    g = gy % derivative;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Cube<eT>& input,
+                const arma::Mat<eT>& gy,
+                arma::Cube<eT>& g)
+  {
+    // Generate a cube using the backpropagated error matrix.
+    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
+        input.n_cols, input.n_slices);
+
+    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+    {
+      for (size_t i = 0; i < gy.n_cols; i++)
+      {
+        arma::Col<eT> temp = gy.col(i).subvec(
+            j * input.n_rows * input.n_cols,
+            (j + 1) * input.n_rows * input.n_cols - 1);
+
+        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
+            input.n_rows, input.n_cols);
+      }
+    }
+
+    arma::Cube<eT> derivative;
+    ActivationFunction::deriv(input, derivative);
+    g = mappedError % derivative;
+  }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */)
+  {
+    /* Nothing to do here */
+  }
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class BaseLayer
+
+// Convenience typedefs.
+
+/**
+ * Standard Sigmoid-Layer using the logistic activation function.
+ */
+template <
+    class ActivationFunction = LogisticFunction,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+using SigmoidLayer = BaseLayer<
+    ActivationFunction, InputDataType, OutputDataType>;
+
+/**
+ * Standard Identity-Layer using the identity activation function.
+ */
+template <
+    class ActivationFunction = IdentityFunction,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+using IdentityLayer = BaseLayer<
+    ActivationFunction, InputDataType, OutputDataType>;
+
+/**
+ * Standard rectified linear unit non-linearity layer.
+ */
+template <
+    class ActivationFunction = RectifierFunction,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+using ReLULayer = BaseLayer<
+    ActivationFunction, InputDataType, OutputDataType>;
+
+/**
+ * Standard hyperbolic tangent layer.
+ */
+template <
+    class ActivationFunction = TanhFunction,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+using TanHLayer = BaseLayer<
+    ActivationFunction, InputDataType, OutputDataType>;
+
+/**
+ * Standard Base-Layer2D using the logistic activation function.
+ */
+template <
+    class ActivationFunction = LogisticFunction,
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::cube
+>
+using BaseLayer2D = BaseLayer<
+    ActivationFunction, InputDataType, OutputDataType>;
+
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp
new file mode 100644
index 0000000..b40bb56
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/bias_layer.hpp
@@ -0,0 +1,208 @@
+/**
+ * @file bias_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the BiasLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_BIAS_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a standard bias layer. The BiasLayer class represents a
+ * single layer of a neural network.
+ *
+ * A convenient typedef is given:
+ *
+ *  - 2DBiasLayer
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class BiasLayer
+{
+ public:
+  /**
+   * Create the BiasLayer object using the specified number of units and bias
+   * parameter.
+   *
+   * @param outSize The number of output units.
+   * @param bias The bias value.
+   */
+  BiasLayer(const size_t outSize, const double bias = 1) :
+      outSize(outSize),
+      bias(bias)
+  {
+    weights.set_size(outSize, 1);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = input + (weights * bias);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    output = input;
+    for (size_t s = 0; s < input.n_slices; s++)
+    {
+      output.slice(s) += weights(s) * bias;
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType, typename ErrorType>
+  void Backward(const DataType& /* unused */,
+                const ErrorType& gy,
+                ErrorType& g)
+  {
+    g = gy;
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the bias.
+   *
+   * @param input The propagated input.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename eT, typename ErrorType, typename GradientType>
+  void Gradient(const arma::Mat<eT>& /* input */,
+                const ErrorType& error,
+                GradientType& gradient)
+  {
+    gradient = error * bias;
+  }
+
+  //! Get the weights.
+  InputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  InputDataType& Weights() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  InputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  InputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(weights, "weights");
+    ar & data::CreateNVP(bias, "bias");
+  }
+
+ private:
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored bias value.
+  double bias;
+
+  //! Locally-stored weight object.
+  InputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  InputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class BiasLayer
+
+//! Layer traits for the bias layer.
+template<typename InputDataType, typename OutputDataType>
+class LayerTraits<BiasLayer<InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = true;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+/**
+ * Standard 2D-Bias-Layer.
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::cube
+>
+using BiasLayer2D = BiasLayer<InputDataType, OutputDataType>;
+
+/**
+ * Standard 2D-Bias-Layer.
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+using AdditionLayer = BiasLayer<InputDataType, OutputDataType>;
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/binary_classification_layer.hpp b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp
new file mode 100644
index 0000000..90975b3
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/binary_classification_layer.hpp
@@ -0,0 +1,106 @@
+/**
+ * @file binary_classification_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the BinaryClassificationLayer class, which implements a
+ * binary class classification layer that can be used as output layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_BINARY_CLASSIFICATION_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a binary classification layer that can be used as
+ * output layer.
+ */
+class BinaryClassificationLayer
+{
+ public:
+  /**
+   * Create the BinaryClassificationLayer object.
+   *
+   * @param confidence The confidence used for the output class transformation.
+   */
+  BinaryClassificationLayer(const double confidence = 0.5) :
+      confidence(confidence)
+  {
+    // Nothing to do here.
+  }
+
+  /*
+   * Calculate the error using the specified input activation and the target.
+   * The error is stored into the given error parameter.
+   *
+   * @param inputActivations Input data used for evaluating the network.
+   * @param target Target data used for evaluating the network.
+   * @param error The calculated error with respect to the input activation and
+   * the given target.
+   */
+  template<typename DataType>
+  void CalculateError(const DataType& inputActivations,
+                      const DataType& target,
+                      DataType& error)
+  {
+    error = inputActivations - target;
+  }
+
+  /*
+   * Calculate the output class using the specified input activation.
+   *
+   * @param inputActivations Input data used to calculate the output class.
+   * @param output Output class of the input activation.
+   */
+  template<typename DataType>
+  void OutputClass(const DataType& inputActivations, DataType& output)
+  {
+    output = inputActivations;
+
+    for (size_t i = 0; i < output.n_elem; i++)
+      output(i) = output(i) > confidence ? 1 : 0;
+  }
+
+  //! Get the confidence parameter.
+  double const& Confidence() const { return confidence; }
+  //! Modify the confidence parameter.
+  double& Confidence() { return confidence; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(confidence, "confidence");
+  }
+
+ private:
+   double confidence;
+
+}; // class BinaryClassificationLayer
+
+//! Layer traits for the binary class classification layer.
+template <>
+class LayerTraits<BinaryClassificationLayer>
+{
+ public:
+  static const bool IsBinary = true;
+  static const bool IsOutputLayer = true;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = false;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/constant_layer.hpp b/src/mlpack/methods/ann/layer/constant_layer.hpp
new file mode 100644
index 0000000..716c0ab
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/constant_layer.hpp
@@ -0,0 +1,121 @@
+/**
+ * @file constant_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the ConstantLayer class, which outputs a constant value given
+ * any input.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONSTANT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the constant layer. The constant layer outputs a given
+ * constant value given any input value.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class ConstantLayer
+{
+ public:
+  /**
+   * Create the ConstantLayer object that outputs a given constant scalar value
+   * given any input value.
+   *
+   * @param outSize The number of output units.
+   * @param scalar The constant value used to create the constant output.
+   */
+  ConstantLayer(const size_t outSize, const double scalar)
+  {
+    constantOutput = OutputDataType(outSize, 1);
+    constantOutput.fill(scalar);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network. The forward pass fills the
+   * output with the specified constant parameter.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& /* input */, arma::Mat<eT>& output)
+  {
+    output = constantOutput;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network. The backward pass of the
+   * constant layer is returns always a zero output error matrix.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>& /* input */,
+                const arma::Mat<eT>& /* gy */,
+                arma::Mat<eT>& g)
+  {
+    g = arma::zeros<arma::Mat<eT> >(inputParameter.n_rows,
+        inputParameter.n_cols);
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(constantOutput, "constantOutput");
+  }
+
+ private:
+  //! Locally-stored constant output matrix.
+  OutputDataType constantOutput;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class ConstantLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp
new file mode 100644
index 0000000..3dafb6d
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/conv_layer.hpp
@@ -0,0 +1,324 @@
+/**
+ * @file conv_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the ConvLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_CONV_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/convolution_rules/border_modes.hpp>
+#include <mlpack/methods/ann/convolution_rules/naive_convolution.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the ConvLayer class. The ConvLayer class represents a
+ * single layer of a neural network.
+ *
+ * @tparam ForwardConvolutionRule Convolution to perform forward process.
+ * @tparam BackwardConvolutionRule Convolution to perform backward process.
+ * @tparam GradientConvolutionRule Convolution to calculate gradient.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename ForwardConvolutionRule = NaiveConvolution<ValidConvolution>,
+    typename BackwardConvolutionRule = NaiveConvolution<FullConvolution>,
+    typename GradientConvolutionRule = NaiveConvolution<ValidConvolution>,
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::cube
+>
+class ConvLayer
+{
+ public:
+  /**
+   * Create the ConvLayer object using the specified number of input maps,
+   * output maps, filter size, stride and padding parameter.
+   *
+   * @param inMaps The number of input maps.
+   * @param outMaps The number of output maps.
+   * @param wfilter Width of the filter/kernel.
+   * @param wfilter Height of the filter/kernel.
+   * @param xStride Stride of filter application in the x direction.
+   * @param yStride Stride of filter application in the y direction.
+   * @param wPad Spatial padding width of the input.
+   * @param hPad Spatial padding height of the input.
+   */
+  ConvLayer(const size_t inMaps,
+            const size_t outMaps,
+            const size_t wfilter,
+            const size_t hfilter,
+            const size_t xStride = 1,
+            const size_t yStride = 1,
+            const size_t wPad = 0,
+            const size_t hPad = 0) :
+      wfilter(wfilter),
+      hfilter(hfilter),
+      inMaps(inMaps),
+      outMaps(outMaps),
+      xStride(xStride),
+      yStride(yStride),
+      wPad(wPad),
+      hPad(hPad)
+  {
+    weights.set_size(wfilter, hfilter, inMaps * outMaps);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    const size_t wConv = ConvOutSize(input.n_rows, wfilter, xStride, wPad);
+    const size_t hConv = ConvOutSize(input.n_cols, hfilter, yStride, hPad);
+
+    output = arma::zeros<arma::Cube<eT> >(wConv, hConv, outMaps);
+    for (size_t outMap = 0, outMapIdx = 0; outMap < outMaps; outMap++)
+    {
+      for (size_t inMap = 0; inMap < inMaps; inMap++, outMapIdx++)
+      {
+        arma::Mat<eT> convOutput;
+        ForwardConvolutionRule::Convolution(input.slice(inMap),
+            weights.slice(outMap), convOutput);
+
+        output.slice(outMap) += convOutput;
+      }
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Cube<eT>& /* unused */,
+                const arma::Cube<eT>& gy,
+                arma::Cube<eT>& g)
+  {
+    g = arma::zeros<arma::Cube<eT> >(inputParameter.n_rows,
+                                     inputParameter.n_cols,
+                                     inputParameter.n_slices);
+
+    for (size_t outMap = 0, outMapIdx = 0; outMap < inMaps; outMap++)
+    {
+      for (size_t inMap = 0; inMap < outMaps; inMap++, outMapIdx++)
+      {
+        arma::Mat<eT> rotatedFilter;
+        Rotate180(weights.slice(outMap * outMaps + inMap), rotatedFilter);
+
+        arma::Mat<eT> output;
+        BackwardConvolutionRule::Convolution(gy.slice(inMap), rotatedFilter,
+            output);
+
+        g.slice(outMap) += output;
+      }
+    }
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Gradient(const InputType& input,
+                const arma::Cube<eT>& d,
+                arma::Cube<eT>& g)
+  {
+    g = arma::zeros<arma::Cube<eT> >(weights.n_rows, weights.n_cols,
+        weights.n_slices);
+
+    for (size_t outMap = 0; outMap < outMaps; outMap++)
+    {
+      for (size_t inMap = 0, s = outMap; inMap < inMaps; inMap++, s += outMaps)
+      {
+        arma::Cube<eT> inputSlices = input.slices(inMap, inMap);
+        arma::Cube<eT> deltaSlices = d.slices(outMap, outMap);
+
+        arma::Cube<eT> output;
+        GradientConvolutionRule::Convolution(inputSlices, deltaSlices, output);
+
+        for (size_t i = 0; i < output.n_slices; i++)
+          g.slice(s) += output.slice(i);
+      }
+    }
+  }
+
+  //! Get the weights.
+  OutputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  OutputDataType& Weights() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(weights, "weights");
+    ar & data::CreateNVP(wfilter, "wfilter");
+    ar & data::CreateNVP(hfilter, "hfilter");
+    ar & data::CreateNVP(inMaps, "inMaps");
+    ar & data::CreateNVP(outMaps, "outMaps");
+    ar & data::CreateNVP(xStride, "xStride");
+    ar & data::CreateNVP(yStride, "yStride");
+    ar & data::CreateNVP(wPad, "wPad");
+    ar & data::CreateNVP(hPad, "hPad");
+  }
+
+ private:
+  /*
+   * Rotates a 3rd-order tesor counterclockwise by 180 degrees.
+   *
+   * @param input The input data to be rotated.
+   * @param output The rotated output.
+   */
+  template<typename eT>
+  void Rotate180(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    output = arma::Cube<eT>(input.n_rows, input.n_cols, input.n_slices);
+
+    // * left-right flip, up-down flip */
+    for (size_t s = 0; s < output.n_slices; s++)
+      output.slice(s) = arma::fliplr(arma::flipud(input.slice(s)));
+  }
+
+  /*
+   * Rotates a dense matrix counterclockwise by 180 degrees.
+   *
+   * @param input The input data to be rotated.
+   * @param output The rotated output.
+   */
+  template<typename eT>
+  void Rotate180(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    // * left-right flip, up-down flip */
+    output = arma::fliplr(arma::flipud(input));
+  }
+
+  /*
+   * Return the convolution output size.
+   *
+   * @param size The size of the input (row or column).
+   * @param k The size of the filter (width or height).
+   * @param s The stride size (x or y direction).
+   * @param p The size of the padding (width or height).
+   * @return The convolution output size.
+   */
+  size_t ConvOutSize(const size_t size,
+                     const size_t k,
+                     const size_t s,
+                     const size_t p)
+  {
+    return std::floor(size + p * 2 - k) / s + 1;
+  }
+
+  //! Locally-stored filter/kernel width.
+  size_t wfilter;
+
+  //! Locally-stored filter/kernel height.
+  size_t hfilter;
+
+  //! Locally-stored number of input maps.
+  size_t inMaps;
+
+  //! Locally-stored number of output maps.
+  size_t outMaps;
+
+  //! Locally-stored stride of the filter in x-direction.
+  size_t xStride;
+
+  //! Locally-stored stride of the filter in y-direction.
+  size_t yStride;
+
+  //! Locally-stored padding width.
+  size_t wPad;
+
+  //! Locally-stored padding height.
+  size_t hPad;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class ConvLayer
+
+//! Layer traits for the convolution layer.
+template<
+    typename ForwardConvolutionRule,
+    typename BackwardConvolutionRule,
+    typename GradientConvolutionRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+class LayerTraits<ConvLayer<ForwardConvolutionRule,
+                            BackwardConvolutionRule,
+                            GradientConvolutionRule,
+                            InputDataType,
+                            OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/dropconnect_layer.hpp b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
new file mode 100644
index 0000000..ad0687f
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/dropconnect_layer.hpp
@@ -0,0 +1,361 @@
+/**
+ * @file dropconnect_layer.hpp
+ * @author Palash Ahuja
+ *
+ * Definition of the DropConnectLayer class, which implements a regularizer
+ * that randomly sets connections to zero. Preventing units from co-adapting.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include "empty_layer.hpp"
+#include <mlpack/methods/ann/network_util.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The DropConnect layer is a regularizer that randomly with probability
+ * ratio sets the connection values to zero and scales the remaining
+ * elements by factor 1 /(1 - ratio). The output is scaled with 1 / (1 - p)
+ * when deterministic is false. In the deterministic mode(during testing),
+ * the layer just computes the output. The output is computed according
+ * to the input layer. If no input layer is given, it will take a linear layer
+ * as default.
+ *
+ * Note:
+ * During training you should set deterministic to false and during testing
+ * you should set deterministic to true.
+ *
+ *  For more information, see the following.
+ *
+ * @code
+ * @inproceedings{WanICML2013,
+ *   title={Regularization of Neural Networks using DropConnect},
+ *   booktitle = {Proceedings of the 30th International Conference on Machine
+ *                Learning(ICML - 13)},
+ *   author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and
+ *             Rob Fergus},
+ *   year = {2013}
+ * }
+ * @endcode
+ *
+ * @tparam InputLayer Layer used instead of the internal linear layer.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template<
+    typename InputLayer = EmptyLayer<arma::mat, arma::mat>,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class DropConnectLayer
+{
+ public:
+ /**
+   * Creates the DropConnect Layer as a Linear Object that takes input size,
+   * output size and ratio as parameter.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   * @param ratio The probability of setting a value to zero.
+   */
+  DropConnectLayer (const size_t inSize,
+                    const size_t outSize,
+                    const double ratio = 0.5) :
+      inSize(inSize),
+      outSize(outSize),
+      ratio(ratio),
+      scale(1.0 / (1 - ratio)),
+      uselayer(false)
+  {
+    weights.set_size(outSize, inSize);
+  }
+
+  /**
+   * Create the DropConnectLayer object using the specified ratio and rescale
+   * parameter. This takes the
+   *
+   * @param ratio The probability of setting a connection to zero.
+   * @param inputLayer the layer object that the dropconnect connection would take.
+   */
+  template<typename InputLayerType>
+  DropConnectLayer(InputLayerType &&inputLayer,
+                   const double ratio = 0.5) :
+      baseLayer(std::forward<InputLayerType>(inputLayer)),
+      ratio(ratio),
+      scale(1.0 / (1 - ratio)),
+      uselayer(true)
+  {
+    static_assert(std::is_same<typename std::decay<InputLayerType>::type,
+                  InputLayer>::value,
+                  "The type of the inputLayer must be InputLayerType");
+  }
+  /**
+  * Ordinary feed forward pass of the DropConnect layer.
+  *
+  * @param input Input data used for evaluating the specified function.
+  * @param output Resulting output activation.
+  */
+  template<typename eT>
+  void Forward(const arma::Mat<eT> &input, arma::Mat<eT> &output)
+  {
+    // The DropConnect mask will not be multiplied in the deterministic mode
+    // (during testing).
+    if (deterministic)
+    {
+      if (uselayer)
+      {
+        baseLayer.Forward(input, output);
+      }
+      else
+      {
+        output = weights * input;
+      }
+    }
+    else
+    {
+      if (uselayer)
+      {
+        // Scale with input / (1 - ratio) and set values to zero with
+        // probability ratio.
+        mask = arma::randu<arma::Mat<eT> >(baseLayer.Weights().n_rows,
+            baseLayer.Weights().n_cols);
+        mask.transform([&](double val) { return (val > ratio); });
+
+        // Save weights for denoising.
+        denoise = baseLayer.Weights();
+
+        baseLayer.Weights() = baseLayer.Weights() % mask;
+
+        baseLayer.Forward(input, output);
+      }
+      else
+      {
+        // Scale the input / ( 1 - ratio) and set values to zero with
+        // probability ratio.
+        mask = arma::randu<arma::Mat<eT> >(weights.n_rows, weights.n_cols);
+        mask.transform([&](double val) { return (val > ratio); });
+
+        // Save weights for denoising.
+        denoise = weights;
+
+        weights = weights % mask;
+        output = weights * input;
+      }
+
+      output = output * scale;
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of the DropConnect layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& input, const DataType& gy, DataType& g)
+  {
+    if (uselayer)
+    {
+      baseLayer.Backward(input, gy, g);
+    }
+    else
+    {
+      g = weights.t() * gy;
+    }
+  }
+
+  /**
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The propagated input.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& input,
+                const arma::Mat<eT>& d,
+                GradientDataType& g)
+  {
+    if (uselayer)
+    {
+      baseLayer.Gradient(input, d, g);
+
+      // Denoise the weights.
+      baseLayer.Weights() = denoise;
+    }
+    else
+    {
+      g = d * input.t();
+
+      // Denoise the weights.
+      weights = denoise;
+    }
+  }
+
+  //! Get the weights.
+  OutputDataType const& Weights() const
+  {
+    if (uselayer)
+      return baseLayer.Weights();
+
+    return weights;
+  }
+
+  //! Modify the weights.
+  OutputDataType& Weights()
+  {
+    if (uselayer)
+      return baseLayer.Weights();
+
+    return weights;
+  }
+
+  //! Get the input parameter.
+  InputDataType &InputParameter() const
+  {
+    if (uselayer)
+      return baseLayer.InputParameter();
+
+    return inputParameter;
+  }
+
+  //! Modify the input parameter.
+  InputDataType &InputParameter()
+  {
+    if (uselayer)
+      return baseLayer.InputParameter();
+
+    return inputParameter;
+  }
+
+  //! Get the output parameter.
+  OutputDataType &OutputParameter() const
+  {
+    if (uselayer)
+      return baseLayer.OutputParameter();
+
+    return outputParameter;
+  }
+
+  //! Modify the output parameter.
+  OutputDataType &OutputParameter()
+  {
+    if (uselayer)
+      return baseLayer.OutputParameter();
+
+    return outputParameter;
+  }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const
+  {
+    if (uselayer)
+      return baseLayer.Delta();
+
+    return delta;
+  }
+
+  //! Modify the delta.
+  OutputDataType& Delta()
+  {
+    if (uselayer)
+      return baseLayer.Delta();
+
+    return delta;
+  }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const
+  {
+    if (uselayer)
+      return baseLayer.Gradient();
+
+    return gradient;
+   }
+
+  //! Modify the gradient.
+  OutputDataType& Gradient()
+  {
+    if (uselayer)
+      return baseLayer.Gradient();
+
+    return gradient;
+  }
+
+  //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+
+  //! Modify the value of the deterministic parameter.
+  bool &Deterministic() { return deterministic; }
+
+  //! The probability of setting a value to zero.
+  double Ratio() const { return ratio; }
+
+  //! Modify the probability of setting a value to zero.
+  void Ratio(const double r)
+  {
+    ratio = r;
+    scale = 1.0 / (1.0 - ratio);
+  }
+
+private:
+  //! Locally-stored layer object.
+  InputLayer baseLayer;
+
+  //! Locally stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! The probability of setting a value to zero.
+  double ratio;
+
+  //! The scale fraction.
+  double scale;
+
+  //! If true the default layer is used otherwise a new layer will be created.
+  bool uselayer;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored mast object.
+  OutputDataType mask;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! Denoise mask for the weights.
+  OutputDataType denoise;
+}; // class DropConnectLayer.
+
+}  // namespace ann
+}  // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout_layer.hpp
new file mode 100644
index 0000000..2596698
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/dropout_layer.hpp
@@ -0,0 +1,252 @@
+/**
+ * @file dropout_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the DropoutLayer class, which implements a regularizer that
+ * randomly sets units to zero. Preventing units from co-adapting.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The dropout layer is a regularizer that randomly with probability ratio
+ * sets input values to zero and scales the remaining elements by factor 1 /
+ * (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when
+ * deterministic is false. In the deterministic mode (during testing), the layer
+ * just scales the output.
+ *
+ * Note: During training you should set deterministic to false and during
+ * testing you should set deterministic to true.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{Hinton2012,
+ *   author  = {Geoffrey E. Hinton, Nitish Srivastava, Alex Krizhevsky,
+ *              Ilya Sutskever, Ruslan Salakhutdinov},
+ *   title   = {Improving neural networks by preventing co-adaptation of feature
+ *              detectors},
+ *   journal = {CoRR},
+ *   volume  = {abs/1207.0580},
+ *   year    = {2012},
+ * }
+ * @endcode
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class DropoutLayer
+{
+ public:
+
+  /**
+   * Create the DropoutLayer object using the specified ratio and rescale
+   * parameter.
+   *
+   * @param ratio The probability of setting a value to zero.
+   * @param rescale If true the input is rescaled when deterministic is False.
+   */
+  DropoutLayer(const double ratio = 0.5,
+               const bool rescale = true) :
+      ratio(ratio),
+      scale(1.0 / (1.0 - ratio)),
+      rescale(rescale)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of the dropout layer.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    // The dropout mask will not be multiplied in the deterministic mode
+    // (during testing).
+    if (deterministic)
+    {
+      if (!rescale)
+      {
+        output = input;
+      }
+      else
+      {
+        output = input * scale;
+      }
+    }
+    else
+    {
+      // Scale with input / (1 - ratio) and set values to zero with probability
+      // ratio.
+      mask = arma::randu<arma::Mat<eT> >(input.n_rows, input.n_cols);
+      mask.transform( [&](double val) { return (val > ratio); } );
+      output = input % mask * scale;
+    }
+  }
+
+  /**
+   * Ordinary feed forward pass of the dropout layer.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    // The dropout mask will not be multiplied in the deterministic mode
+    // (during testing).
+    if (deterministic)
+    {
+      if (!rescale)
+      {
+        output = input;
+      }
+      else
+      {
+        output = input * scale;
+      }
+    }
+    else
+    {
+      // Scale with input / (1 - ratio) and set values to zero with probability
+      // ratio.
+      mask = arma::randu<arma::Cube<eT> >(input.n_rows, input.n_cols,
+          input.n_slices);
+      mask.transform( [&](double val) { return (val > ratio); } );
+      output = input % mask * scale;
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of the dropout layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& /* unused */,
+                const DataType& gy,
+                DataType& g)
+  {
+    g = gy % mask * scale;
+  }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the detla.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! The value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  //! The probability of setting a value to zero.
+  double Ratio() const { return ratio; }
+
+  //! Modify the probability of setting a value to zero.
+  void Ratio(const double r)
+  {
+    ratio = r;
+    scale = 1.0 / (1.0 - ratio);
+  }
+
+  //! The value of the rescale parameter.
+  bool Rescale() const {return rescale; }
+  //! Modify the value of the rescale parameter.
+  bool& Rescale() {return rescale; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(ratio, "ratio");
+    ar & data::CreateNVP(rescale, "rescale");
+  }
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored mast object.
+  OutputDataType mask;
+
+  //! The probability of setting a value to zero.
+  double ratio;
+
+  //! The scale fraction.
+  double scale;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+
+  //! If true the input is rescaled when deterministic is False.
+  bool rescale;
+}; // class DropoutLayer
+
+//! Layer traits for the bias layer.
+template <
+  typename InputDataType,
+  typename OutputDataType
+>
+class LayerTraits<DropoutLayer<InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+/**
+ * Standard Dropout-Layer2D.
+ */
+template <
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::cube
+>
+using DropoutLayer2D = DropoutLayer<InputDataType, OutputDataType>;
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/glimpse_layer.hpp b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
new file mode 100644
index 0000000..64f04d5
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/glimpse_layer.hpp
@@ -0,0 +1,484 @@
+/**
+ * @file glimpse_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the GlimpseLayer class, which takes an input image and a
+ * location to extract a retina-like representation of the input image at
+ * different increasing scales.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @article{CoRR2014,
+ *   author  = {Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
+ *   title   = {Recurrent Models of Visual Attention},
+ *   journal = {CoRR},
+ *   volume  = {abs/1406.6247},
+ *   year    = {2014},
+ * }
+ * @endcode
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_GLIMPSE_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
+#include <algorithm>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The glimpse layer returns a retina-like representation
+ * (down-scaled cropped images) of increasing scale around a given location in a
+ * given image.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::cube
+>
+class GlimpseLayer
+{
+ public:
+
+  /**
+   * Create the GlimpseLayer object using the specified ratio and rescale
+   * parameter.
+   *
+   * @param inSize The size of the input units.
+   * @param size The used glimpse size (height = width).
+   * @param depth The number of patches to crop per glimpse.
+   * @param scale The scaling factor used to create the increasing retina-like
+   *        representation.
+   */
+  GlimpseLayer(const size_t inSize,
+               const size_t size,
+               const size_t depth = 3,
+               const size_t scale = 2) :
+      inSize(inSize),
+      size(size),
+      depth(depth),
+      scale(scale)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of the glimpse layer.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    output = arma::Cube<eT>(size, size, depth * input.n_slices);
+
+    inputDepth = input.n_slices / inSize;
+
+    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
+    {
+      for (size_t depthIdx = 0, glimpseSize = size;
+          depthIdx < depth; depthIdx++, glimpseSize *= scale)
+      {
+        size_t padSize = std::floor((glimpseSize - 1) / 2);
+
+        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
+            input.n_rows + padSize * 2, input.n_cols + padSize * 2,
+            input.n_slices / inSize);
+
+        inputPadded.tube(padSize, padSize, padSize + input.n_rows - 1,
+            padSize + input.n_cols - 1) = input.subcube(0, 0,
+            inputIdx * inputDepth, input.n_rows - 1, input.n_cols - 1,
+            (inputIdx + 1) * inputDepth - 1);
+
+        size_t h = inputPadded.n_rows - glimpseSize;
+        size_t w = inputPadded.n_cols - glimpseSize;
+
+        size_t x = std::min(h, (size_t) std::max(0.0,
+            (location(0, inputIdx) + 1) / 2.0 * h));
+        size_t y = std::min(w, (size_t) std::max(0.0,
+            (location(1, inputIdx) + 1) / 2.0 * w));
+
+        if (depthIdx == 0)
+        {
+          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
+              j < output.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            output.slice(j) = inputPadded.subcube(x, y,
+                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                paddedSlice);
+          }
+        }
+        else
+        {
+          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
+              j < output.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            arma::Mat<eT> poolingInput = inputPadded.subcube(x, y,
+                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                paddedSlice);
+
+            if (scale == 2)
+            {
+              Pooling(glimpseSize / size, poolingInput, output.slice(j));
+            }
+            else
+            {
+              ReSampling(poolingInput, output.slice(j));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of the glimpse layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename ErrorType, typename eT>
+  void Backward(const InputType& input,
+                const ErrorType& gy,
+                arma::Cube<eT>& g)
+  {
+    // Generate a cube using the backpropagated error matrix.
+    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
+        input.n_cols, input.n_slices);
+
+    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+    {
+      for (size_t i = 0; i < gy.n_cols; i++)
+      {
+        arma::Col<eT> temp = gy.col(i).subvec(
+            j * input.n_rows * input.n_cols,
+            (j + 1) * input.n_rows * input.n_cols - 1);
+
+        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
+            input.n_rows, input.n_cols);
+      }
+    }
+
+    g = arma::zeros<arma::cube>(inputParameter.n_rows, inputParameter.n_cols,
+        inputParameter.n_slices);
+
+    for (size_t inputIdx = 0; inputIdx < inSize; inputIdx++)
+    {
+      for (size_t depthIdx = 0, glimpseSize = size;
+          depthIdx < depth; depthIdx++, glimpseSize *= scale)
+      {
+        size_t padSize = std::floor((glimpseSize - 1) / 2);
+
+        arma::Cube<eT> inputPadded = arma::zeros<arma::Cube<eT> >(
+            inputParameter.n_rows + padSize * 2, inputParameter.n_cols +
+            padSize * 2, inputParameter.n_slices / inSize);
+
+        size_t h = inputPadded.n_rows - glimpseSize;
+        size_t w = inputPadded.n_cols - glimpseSize;
+
+        size_t x = std::min(h, (size_t) std::max(0.0,
+            (location(0, inputIdx) + 1) / 2.0 * h));
+        size_t y = std::min(w, (size_t) std::max(0.0,
+            (location(1, inputIdx) + 1) / 2.0 * w));
+
+        if (depthIdx == 0)
+        {
+          for (size_t j = (inputIdx + depthIdx), paddedSlice = 0;
+              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            inputPadded.subcube(x, y,
+            paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+            paddedSlice) = mappedError.slice(j);
+          }
+        }
+        else
+        {
+          for (size_t j = (inputIdx + depthIdx * (depth - 1)), paddedSlice = 0;
+              j < mappedError.n_slices; j += (inSize * depth), paddedSlice++)
+          {
+            arma::Mat<eT> poolingOutput = inputPadded.subcube(x, y,
+                 paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                 paddedSlice);
+
+            if (scale == 2)
+            {
+              Unpooling(inputParameter.slice(paddedSlice), mappedError.slice(j),
+                  poolingOutput);
+            }
+            else
+            {
+              DownwardReSampling(inputParameter.slice(paddedSlice),
+                  mappedError.slice(j), poolingOutput);
+            }
+
+            inputPadded.subcube(x, y,
+                paddedSlice, x + glimpseSize - 1, y + glimpseSize - 1,
+                paddedSlice) = poolingOutput;
+          }
+        }
+
+        g += inputPadded.tube(padSize, padSize, padSize +
+            inputParameter.n_rows - 1, padSize + inputParameter.n_cols - 1);
+      }
+    }
+
+    Transform(g);
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const {return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const {return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the detla.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Set the locationthe x and y coordinate of the center of the output
+  //! glimpse.
+  void Location(const arma::mat& location)
+  {
+    this->location = location;
+  }
+
+ private:
+  /*
+   * Transform the given input by changing rows to columns.
+   *
+   * @param w The input matrix used to perform the transformation.
+   */
+  void Transform(arma::mat& w)
+  {
+    arma::mat t = w;
+
+    for (size_t i = 0, k = 0; i < w.n_elem; k++)
+    {
+      for (size_t j = 0; j < w.n_cols; j++, i++)
+      {
+        w(k, j) = t(i);
+      }
+    }
+  }
+
+  /*
+   * Transform the given input by changing rows to columns.
+   *
+   * @param w The input matrix used to perform the transformation.
+   */
+  void Transform(arma::cube& w)
+  {
+    for (size_t i = 0; i < w.n_slices; i++)
+    {
+      arma::mat t = w.slice(i);
+      Transform(t);
+      w.slice(i) = t;
+    }
+  }
+
+  /**
+   * Apply pooling to the input and store the results to the output parameter.
+   *
+   * @param kSize the kernel size used to perform the pooling operation.
+   * @param input The input to be apply the pooling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Pooling(const size_t kSize,
+               const arma::Mat<eT>& input,
+               arma::Mat<eT>& output)
+  {
+
+    const size_t rStep = kSize;
+    const size_t cStep = kSize;
+
+    for (size_t j = 0; j < input.n_cols; j += cStep)
+    {
+      for (size_t i = 0; i < input.n_rows; i += rStep)
+      {
+        output(i / rStep, j / cStep) += pooling.Pooling(
+            input(arma::span(i, i + rStep - 1), arma::span(j, j + cStep - 1)));
+      }
+    }
+  }
+
+  /**
+   * Apply unpooling to the input and store the results.
+   *
+   * @param input The input to be apply the unpooling rule.
+   * @param error The error used to perform the unpooling operation.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Unpooling(const arma::Mat<eT>& input,
+                 const arma::Mat<eT>& error,
+                 arma::Mat<eT>& output)
+  {
+    const size_t rStep = input.n_rows / error.n_rows;
+    const size_t cStep = input.n_cols / error.n_cols;
+
+    arma::Mat<eT> unpooledError;
+    for (size_t j = 0; j < input.n_cols; j += cStep)
+    {
+      for (size_t i = 0; i < input.n_rows; i += rStep)
+      {
+        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
+                                               arma::span(j, j + cStep - 1));
+
+        pooling.Unpooling(inputArea, error(i / rStep, j / cStep),
+            unpooledError);
+
+        output(arma::span(i, i + rStep - 1),
+            arma::span(j, j + cStep - 1)) += unpooledError;
+      }
+    }
+  }
+
+  /**
+   * Apply ReSampling to the input and store the results in the output
+   * parameter.
+   *
+   * @param input The input to be apply the ReSampling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void ReSampling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    double wRatio = (double) (input.n_rows - 1) / (size - 1);
+    double hRatio = (double) (input.n_cols - 1) / (size - 1);
+
+    double iWidth = input.n_rows - 1;
+    double iHeight = input.n_cols - 1;
+
+    for (size_t y = 0; y < size; y++)
+    {
+      for (size_t x = 0; x < size; x++)
+      {
+        double ix = wRatio * x;
+        double iy = hRatio * y;
+
+        // Get the 4 nearest neighbors.
+        double ixNw = std::floor(ix);
+        double iyNw = std::floor(iy);
+        double ixNe = ixNw + 1;
+        double iySw = iyNw + 1;
+
+        // Get surfaces to each neighbor.
+        double se = (ix - ixNw) * (iy - iyNw);
+        double sw = (ixNe - ix) * (iy - iyNw);
+        double ne = (ix - ixNw) * (iySw - iy);
+        double nw = (ixNe - ix) * (iySw - iy);
+
+        // Calculate the weighted sum.
+        output(y, x) = input(iyNw, ixNw) * nw +
+            input(iyNw, std::min(ixNe,  iWidth)) * ne +
+            input(std::min(iySw, iHeight), ixNw) * sw +
+            input(std::min(iySw, iHeight), std::min(ixNe, iWidth)) * se;
+      }
+    }
+  }
+
+  /**
+   * Apply DownwardReSampling to the input and store the results into the output
+   * parameter.
+   *
+   * @param input The input to be apply the DownwardReSampling rule.
+   * @param error The error used to perform the DownwardReSampling operation.
+   * @param output The DownwardReSampled result.
+   */
+  template<typename eT>
+  void DownwardReSampling(const arma::Mat<eT>& input,
+                          const arma::Mat<eT>& error,
+                          arma::Mat<eT>& output)
+  {
+    double iWidth = input.n_rows - 1;
+    double iHeight = input.n_cols - 1;
+
+    double wRatio = iWidth / (size - 1);
+    double hRatio = iHeight / (size - 1);
+
+    for (size_t y = 0; y < size; y++)
+    {
+      for (size_t x = 0; x < size; x++)
+      {
+        double ix = wRatio * x;
+        double iy = hRatio * y;
+
+        // Get the 4 nearest neighbors.
+        double ixNw = std::floor(ix);
+        double iyNw = std::floor(iy);
+        double ixNe = ixNw + 1;
+        double iySw = iyNw + 1;
+
+        // Get surfaces to each neighbor.
+        double se = (ix - ixNw) * (iy - iyNw);
+        double sw = (ixNe - ix) * (iy - iyNw);
+        double ne = (ix - ixNw) * (iySw - iy);
+        double nw = (ixNe - ix) * (iySw - iy);
+
+        double ograd = error(y, x);
+
+        output(iyNw, ixNw) = output(iyNw, ixNw) + nw * ograd;
+        output(iyNw, std::min(ixNe, iWidth)) = output(iyNw,
+            std::min(ixNe, iWidth)) + ne * ograd;
+        output(std::min(iySw, iHeight), ixNw) = output(std::min(iySw, iHeight),
+            ixNw) + sw * ograd;
+        output(std::min(iySw, iHeight), std::min(ixNe, iWidth)) = output(
+            std::min(iySw, iHeight), std::min(ixNe, iWidth)) + se * ograd;
+      }
+    }
+  }
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored depth of the input.
+  size_t inputDepth;
+
+  //! The size of the input units.
+  size_t inSize;
+
+  //! The used glimpse size (height = width).
+  size_t size;
+
+  //! The number of patches to crop per glimpse.
+  size_t depth;
+
+  //! The scale fraction.
+  size_t scale;
+
+  //! The x and y coordinate of the center of the output glimpse.
+  arma::mat location;
+
+  //! Locally-stored object to perform the mean pooling operation.
+  MeanPooling pooling;
+}; // class GlimpseLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
new file mode 100644
index 0000000..5ebe613
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/hard_tanh_layer.hpp
@@ -0,0 +1,259 @@
+/**
+ * @file hard_tanh_layer.hpp
+ * @author Dhawal Arora
+ *
+ * Definition and implementation of the HardTanHLayer layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_HARD_TANH_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The Hard Tanh activation function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     max & : x > maxValue \\
+ *     min & : x \le minValue \\
+ *     x   & : otherwise
+ *   \end{array}
+ * \right. \\
+ * f'(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     0 & : x > maxValue \\
+ *     0 & : x \le minValue \\
+ *     1 & : otherwise
+ *   \end{array}
+ * \right.
+ * @f}
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class HardTanHLayer
+{
+ public:
+  /**
+   * Create the HardTanHLayer object using the specified parameters. The range
+   * of the linear region can be adjusted by specifying the maxValue and
+   * minValue. Default (maxValue = 1, minValue = -1).
+   *
+   * @param maxValue Range of the linear region maximum value.
+   * @param minValue Range of the linear region minimum value.
+   */
+  HardTanHLayer(const double maxValue = 1, const double minValue = -1) :
+      maxValue(maxValue), minValue(minValue)
+  {
+     // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType& input, OutputType& output)
+  {
+    Fn(input, output);
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& input,
+                const DataType& gy,
+                DataType& g)
+  {
+    DataType derivative;
+    Deriv(input, derivative);
+    g = gy % derivative;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Cube<eT>& input,
+                const arma::Mat<eT>& gy,
+                arma::Cube<eT>& g)
+  {
+    // Generate a cube using the backpropagated error matrix.
+    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
+        input.n_cols, input.n_slices);
+
+    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+    {
+      for (size_t i = 0; i < gy.n_cols; i++)
+      {
+        arma::Col<eT> temp = gy.col(i).subvec(
+            j * input.n_rows * input.n_cols,
+            (j + 1) * input.n_rows * input.n_cols - 1);
+
+        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
+            input.n_rows, input.n_cols);
+      }
+    }
+
+    arma::Cube<eT> derivative;
+    Deriv(input, derivative);
+    g = mappedError % derivative;
+  }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the maximum value.
+  double const& MaxValue() const { return maxValue; }
+  //! Modify the maximum value.
+  double& MaxValue() { return maxValue; }
+
+  //! Get the minimum value.
+  double const& MinValue() const { return minValue; }
+  //! Modify the minimum value.
+  double& MinValue() { return minValue; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(maxValue, "maxValue");
+    ar & data::CreateNVP(minValue, "minValue");
+  }
+
+ private:
+  /**
+   * Computes the HardTanH function.
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  double Fn(const double x)
+  {
+    if (x > maxValue)
+      return maxValue;
+    else if (x < minValue)
+      return minValue;
+    return x;
+  }
+
+  /**
+   * Computes the HardTanH function using a dense matrix as input.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+
+  template<typename eT>
+  void Fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
+  {
+    y = x;
+    y.transform( [&](eT val) { return std::min(
+        std::max( val, minValue ), maxValue ); } );
+  }
+
+  /**
+   * Computes the HardTanH function using a 3rd-order tensor as input.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename eT>
+  void Fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
+  {
+    y = x;
+    for (size_t s = 0; s < x.n_slices; s++)
+      Fn(x.slice(s), y.slice(s));
+  }
+
+  /**
+   * Computes the first derivative of the HardTanH function.
+   *
+   * @param x Input data.
+   * @return f'(x)
+   */
+  double Deriv(const double x)
+  {
+    return (x > maxValue || x < minValue) ? 0 : 1;
+  }
+
+  /**
+   * Computes the first derivative of the HardTanH function.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+  template<typename InputType, typename OutputType>
+  void Deriv(const InputType& x, OutputType& y)
+  {
+    y = x;
+
+    for (size_t i = 0; i < x.n_elem; i++)
+      y(i) = Deriv(x(i));
+  }
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Maximum value for the HardTanH function.
+  double maxValue;
+
+  //! Minimum value for the HardTanH function.
+  double minValue;
+}; // class HardTanHLayer
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
new file mode 100644
index 0000000..deaecb3
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/leaky_relu_layer.hpp
@@ -0,0 +1,240 @@
+/**
+ * @file leaky_relu_layer.hpp
+ * @author Dhawal Arora
+ *
+ * Definition and implementation of LeakyReLULayer layer first introduced
+ * in the acoustic model, Andrew L. Maas, Awni Y. Hannun, Andrew Y. Ng,
+ * "Rectifier Nonlinearities Improve Neural Network Acoustic Models", 2014
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LEAKYRELU_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The LeakyReLU activation function, defined by
+ *
+ * @f{eqnarray*}{
+ * f(x) &=& \max(x, alpha*x) \\
+ * f'(x) &=& \left\{
+ *   \begin{array}{lr}
+ *     1 & : x > 0 \\
+ *     alpha & : x \le 0
+ *   \end{array}
+ * \right.
+ * @f}
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class LeakyReLULayer
+{
+ public:
+  /**
+   * Create the LeakyReLULayer object using the specified parameters.
+   * The non zero gradient can be adjusted by specifying tha parameter
+   * alpha in the range 0 to 1. Default (alpha = 0.03)
+   *
+   * @param alpha Non zero gradient
+   */
+  LeakyReLULayer(const double alpha = 0.03) : alpha(alpha)
+  {
+     // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType& input, OutputType& output)
+  {
+    Fn(input, output);
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& input,
+                const DataType& gy,
+                DataType& g)
+  {
+    DataType derivative;
+    Deriv(input, derivative);
+    g = gy % derivative;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Cube<eT>& input,
+                const arma::Mat<eT>& gy,
+                arma::Cube<eT>& g)
+  {
+    // Generate a cube using the backpropagated error matrix.
+    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(input.n_rows,
+        input.n_cols, input.n_slices);
+
+    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+    {
+      for (size_t i = 0; i < gy.n_cols; i++)
+      {
+        arma::Col<eT> temp = gy.col(i).subvec(
+            j * input.n_rows * input.n_cols,
+            (j + 1) * input.n_rows * input.n_cols - 1);
+
+        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
+            input.n_rows, input.n_cols);
+      }
+    }
+
+    arma::Cube<eT> derivative;
+    Deriv(input, derivative);
+    g = mappedError % derivative;
+  }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the non zero gradient.
+  double const& Alpha() const { return alpha; }
+  //! Modify the non zero gradient.
+  double& Alpha() { return alpha; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(alpha, "alpha");
+  }
+
+ private:
+  /**
+   * Computes the LeakReLU function
+   *
+   * @param x Input data.
+   * @return f(x).
+   */
+  double Fn(const double x)
+  {
+    return std::max(x, alpha * x);
+  }
+
+  /**
+   * Computes the Leaky ReLU function using a dense matrix as input.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename eT>
+  void Fn(const arma::Mat<eT>& x, arma::Mat<eT>& y)
+  {
+    y = arma::max(x, alpha * x);
+  }
+
+  /**
+   * Computes the LeakyReLU function using a 3rd-order tensor as input.
+   *
+   * @param x Input data.
+   * @param y The resulting output activation.
+   */
+  template<typename eT>
+  void Fn(const arma::Cube<eT>& x, arma::Cube<eT>& y)
+  {
+    y = x;
+    for (size_t s = 0; s < x.n_slices; s++)
+      fn(x.slice(s), y.slice(s));
+  }
+
+  /**
+   * Computes the first derivative of the LeakyReLU function.
+   *
+   * @param x Input data.
+   * @return f'(x)
+   */
+  double Deriv(const double x)
+  {
+    return (x >= 0) ? 1 : alpha;
+  }
+
+  /**
+   * Computes the first derivative of the LeakyReLU function.
+   *
+   * @param y Input activations.
+   * @param x The resulting derivatives.
+   */
+
+  template<typename InputType, typename OutputType>
+  void Deriv(const InputType& x, OutputType& y)
+  {
+    y = x;
+
+    for (size_t i = 0; i < x.n_elem; i++)
+      y(i) = Deriv(x(i));
+  }
+
+
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Leakyness Parameter in the range 0 <alpha< 1
+  double alpha;
+
+}; // class LeakyReLULayer
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/linear_layer.hpp b/src/mlpack/methods/ann/layer/linear_layer.hpp
new file mode 100644
index 0000000..17c4626
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/linear_layer.hpp
@@ -0,0 +1,289 @@
+/**
+ * @file linear_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the LinearLayer class also known as fully-connected layer or
+ * affine transformation.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LINEAR_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the LinearLayer class. The LinearLayer class represents a
+ * single layer of a neural network.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class LinearLayer
+{
+ public:
+  /**
+   * Create the LinearLayer object using the specified number of units.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   */
+  LinearLayer(const size_t inSize, const size_t outSize) :
+      inSize(inSize),
+      outSize(outSize)
+  {
+    weights.set_size(outSize, inSize);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = weights * input;
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Mat<eT>& output)
+  {
+    arma::Mat<eT> data(input.n_elem, 1);
+
+    for (size_t s = 0, c = 0; s < input.n_slices / data.n_cols; s++)
+    {
+      for (size_t i = 0; i < data.n_cols; i++, c++)
+      {
+        data.col(i).subvec(s * input.n_rows * input.n_cols, (s + 1) *
+            input.n_rows * input.n_cols - 1) = arma::trans(arma::vectorise(
+            input.slice(c), 1));
+      }
+    }
+
+    output = weights * data;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Backward(const InputType& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
+  {
+    g = weights.t() * gy;
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The propagated input.
+   * @param error The calculated error.
+   * @param gradient The calculated gradient.
+   */
+  template<typename InputType, typename ErrorType, typename GradientType>
+  void Gradient(const InputType& input,
+                const ErrorType& error,
+                GradientType& gradient)
+  {
+    GradientDelta(input, error, gradient);
+  }
+
+  //! Get the weights.
+  OutputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  OutputDataType& Weights() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(weights, "weights");
+  }
+
+ private:
+  /*
+   * Calculate the gradient using the output delta (3rd order tensor) and the
+   * input activation (3rd order tensor).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Cube<eT>& input,
+                     const arma::Mat<eT>& d,
+                     arma::Cube<eT>& g)
+  {
+    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
+    arma::Mat<eT> data = arma::Mat<eT>(d.n_cols,
+        input.n_elem / d.n_cols);
+
+    for (size_t s = 0, c = 0; s < input.n_slices /
+        data.n_rows; s++)
+    {
+      for (size_t i = 0; i < data.n_rows; i++, c++)
+      {
+        data.row(i).subvec(s * input.n_rows *
+            input.n_cols, (s + 1) *
+            input.n_rows *
+        input.n_cols - 1) = arma::vectorise(
+                input.slice(c), 1);
+      }
+    }
+
+    g.slice(0) = d * data / d.n_cols;
+  }
+
+  /*
+   * Calculate the gradient (3rd order tensor) using the output delta
+   * (dense matrix) and the input activation (dense matrix).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Mat<eT>& input,
+                     const arma::Mat<eT>& d,
+                     arma::Cube<eT>& g)
+  {
+    g = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
+    Gradient(input, d, g.slice(0));
+  }
+
+  /*
+   * Calculate the gradient (dense matrix) using the output delta
+   * (dense matrix) and the input activation (3rd order tensor).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Cube<eT>& input,
+                     const arma::Mat<eT>& d,
+                     arma::Mat<eT>& g)
+  {
+    arma::Cube<eT> grad = arma::Cube<eT>(weights.n_rows, weights.n_cols, 1);
+    Gradient(input, d, grad);
+    g = grad.slice(0);
+  }
+
+  /*
+   * Calculate the gradient (dense matrix) using the output delta
+   * (dense matrix) and the input activation (dense matrix).
+   *
+   * @param input The input parameter used for calculating the gradient.
+   * @param d The output delta.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void GradientDelta(const arma::Mat<eT>& input,
+                     const arma::Mat<eT>& d,
+                     arma::Mat<eT>& g)
+  {
+    g = d * input.t();
+  }
+
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class LinearLayer
+
+/**
+ * Linear Mapping layer to map between 3rd order tensors and dense matrices.
+ */
+template <
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::mat
+>
+using LinearMappingLayer = LinearLayer<InputDataType, OutputDataType>;
+
+//! Layer traits for the linear layer.
+template<
+    typename InputDataType,
+    typename OutputDataType
+>
+class LayerTraits<LinearLayer<InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/log_softmax_layer.hpp b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
new file mode 100644
index 0000000..32aa2d5
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/log_softmax_layer.hpp
@@ -0,0 +1,131 @@
+/**
+ * @file log_softmax_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the LogSoftmaxLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LOG_SOFTMAX_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the log softmax layer. The log softmax loss layer computes
+ * the multinomial logistic loss of the softmax of its inputs. This layer is
+ * meant to be used in combination with the negative log likelihood layer
+ * (NegativeLogLikelihoodLayer), which expects that the input contains
+ * log-probabilities for each class.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class LogSoftmaxLayer
+{
+ public:
+  /**
+   * Create the LogSoftmaxLayer object.
+   */
+  LogSoftmaxLayer() { /* Nothing to do here. */ }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    arma::mat maxInput = arma::repmat(arma::max(input), input.n_rows, 1);
+    output = (maxInput - input);
+
+    // Approximation of the hyperbolic tangent. The acuracy however is
+    // about 0.00001 lower as using tanh. Credits go to Leon Bottou.
+    output.transform( [](double x)
+    {
+      //! Fast approximation of exp(-x) for x positive.
+      static constexpr double A0 = 1.0;
+      static constexpr double A1 = 0.125;
+      static constexpr double A2 = 0.0078125;
+      static constexpr double A3 = 0.00032552083;
+      static constexpr double A4 = 1.0172526e-5;
+
+      if (x < 13.0)
+      {
+        double y = A0 + x * (A1 + x * (A2 + x * (A3 + x * A4)));
+        y *= y;
+        y *= y;
+        y *= y;
+        y = 1 / y;
+
+        return y;
+      }
+
+      return 0.0;
+    } );
+
+    output = input - (maxInput + std::log(arma::accu(output)));
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>& input,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
+  {
+    g = gy - arma::exp(input) * arma::accu(gy);
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  InputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  InputDataType& Delta() { return delta; }
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class LogSoftmaxLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/lstm_layer.hpp b/src/mlpack/methods/ann/layer/lstm_layer.hpp
new file mode 100644
index 0000000..7ffe1a8
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/lstm_layer.hpp
@@ -0,0 +1,418 @@
+/**
+ * @file lstm_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the LSTMLayer class, which implements a lstm network
+ * layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_LSTM_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a lstm network layer.
+ *
+ * This class allows specification of the type of the activation functions used
+ * for the gates and cells and also of the type of the function used to
+ * initialize and update the peephole weights.
+ *
+ * @tparam GateActivationFunction Activation function used for the gates.
+ * @tparam StateActivationFunction Activation function used for the state.
+ * @tparam OutputActivationFunction Activation function used for the output.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    class GateActivationFunction = LogisticFunction,
+    class StateActivationFunction = TanhFunction,
+    class OutputActivationFunction = TanhFunction,
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class LSTMLayer
+{
+ public:
+  /**
+   * Create the LSTMLayer object using the specified parameters.
+   *
+   * @param outSize The number of output units.
+   * @param peepholes The flag used to indicate if peephole connections should
+   *        be used (Default: false).
+   * @param WeightInitRule The weight initialization rule used to initialize the
+   *        weight matrix.
+   */
+  LSTMLayer(const size_t outSize, const bool peepholes = false) :
+      outSize(outSize),
+      peepholes(peepholes),
+      seqLen(1),
+      offset(0)
+  {
+    if (peepholes)
+    {
+      peepholeWeights.set_size(outSize, 3);
+      peepholeDerivatives = arma::zeros<OutputDataType>(outSize, 3);
+    }
+    else
+    {
+      peepholeWeights.set_size(0, 0);
+    }
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    if (inGate.n_cols < seqLen)
+    {
+      inGate = arma::zeros<InputDataType>(outSize, seqLen);
+      inGateAct = arma::zeros<InputDataType>(outSize, seqLen);
+      inGateError = arma::zeros<InputDataType>(outSize, seqLen);
+      outGate = arma::zeros<InputDataType>(outSize, seqLen);
+      outGateAct = arma::zeros<InputDataType>(outSize, seqLen);
+      outGateError = arma::zeros<InputDataType>(outSize, seqLen);
+      forgetGate = arma::zeros<InputDataType>(outSize, seqLen);
+      forgetGateAct = arma::zeros<InputDataType>(outSize, seqLen);
+      forgetGateError = arma::zeros<InputDataType>(outSize, seqLen);
+      state = arma::zeros<InputDataType>(outSize, seqLen);
+      stateError = arma::zeros<InputDataType>(outSize, seqLen);
+      cellAct = arma::zeros<InputDataType>(outSize, seqLen);
+    }
+
+    // Split up the inputactivation into the 3 parts (inGate, forgetGate,
+    // outGate).
+    inGate.col(offset) = input.submat(0, 0, outSize - 1, 0);
+
+    forgetGate.col(offset) = input.submat(outSize, 0, (outSize * 2) - 1, 0);
+    outGate.col(offset) = input.submat(outSize * 3, 0, (outSize * 4) - 1, 0);
+
+    if (peepholes && offset > 0)
+    {
+      inGate.col(offset) += peepholeWeights.col(0) % state.col(offset - 1);
+      forgetGate.col(offset) += peepholeWeights.col(1) %
+          state.col(offset - 1);
+    }
+
+    arma::Col<eT> inGateActivation = inGateAct.unsafe_col(offset);
+    GateActivationFunction::fn(inGate.unsafe_col(offset), inGateActivation);
+
+    arma::Col<eT> forgetGateActivation = forgetGateAct.unsafe_col(offset);
+    GateActivationFunction::fn(forgetGate.unsafe_col(offset),
+        forgetGateActivation);
+
+    arma::Col<eT> cellActivation = cellAct.unsafe_col(offset);
+    StateActivationFunction::fn(input.submat(outSize * 2, 0,
+        (outSize * 3) - 1, 0), cellActivation);
+
+    state.col(offset) = inGateAct.col(offset) % cellActivation;
+
+    if (offset > 0)
+      state.col(offset) += forgetGateAct.col(offset) % state.col(offset - 1);
+
+    if (peepholes)
+      outGate.col(offset) += peepholeWeights.col(2) % state.col(offset);
+
+    arma::Col<eT> outGateActivation = outGateAct.unsafe_col(offset);
+    GateActivationFunction::fn(outGate.unsafe_col(offset), outGateActivation);
+
+    OutputActivationFunction::fn(state.unsafe_col(offset), output);
+    output = outGateAct.col(offset) % output;
+
+    offset = (offset + 1) % seqLen;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Backward(const InputType& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
+  {
+    queryOffset = seqLen - offset - 1;
+
+    arma::Col<eT> outGateDerivative;
+    GateActivationFunction::deriv(outGateAct.unsafe_col(queryOffset),
+        outGateDerivative);
+
+    arma::Col<eT> stateActivation;
+    StateActivationFunction::fn(state.unsafe_col(queryOffset), stateActivation);
+
+    outGateError.col(queryOffset) = outGateDerivative % gy % stateActivation;
+
+    arma::Col<eT> stateDerivative;
+    StateActivationFunction::deriv(stateActivation, stateDerivative);
+
+    stateError.col(queryOffset) = gy % outGateAct.col(queryOffset) %
+        stateDerivative;
+
+    if (queryOffset < (seqLen - 1))
+    {
+      stateError.col(queryOffset) += stateError.col(queryOffset + 1) %
+          forgetGateAct.col(queryOffset + 1);
+
+      if (peepholes)
+      {
+        stateError.col(queryOffset) += inGateError.col(queryOffset + 1) %
+            peepholeWeights.col(0);
+        stateError.col(queryOffset) += forgetGateError.col(queryOffset + 1) %
+            peepholeWeights.col(1);
+      }
+    }
+
+    if (peepholes)
+    {
+      stateError.col(queryOffset) += outGateError.col(queryOffset) %
+          peepholeWeights.col(2);
+    }
+
+    arma::Col<eT> cellDerivative;
+    StateActivationFunction::deriv(cellAct.col(queryOffset), cellDerivative);
+
+    arma::Col<eT> cellError = inGateAct.col(queryOffset) % cellDerivative %
+        stateError.col(queryOffset);
+
+    if (queryOffset > 0)
+    {
+      arma::Col<eT> forgetGateDerivative;
+      GateActivationFunction::deriv(forgetGateAct.col(queryOffset),
+          forgetGateDerivative);
+
+      forgetGateError.col(queryOffset) = forgetGateDerivative %
+          stateError.col(queryOffset) % state.col(queryOffset - 1);
+    }
+
+    arma::Col<eT> inGateDerivative;
+    GateActivationFunction::deriv(inGateAct.col(queryOffset), inGateDerivative);
+
+    inGateError.col(queryOffset) = inGateDerivative %
+        stateError.col(queryOffset) % cellAct.col(queryOffset);
+
+    if (peepholes)
+    {
+      peepholeDerivatives.col(2) += outGateError.col(queryOffset) %
+          state.col(queryOffset);
+
+      if (queryOffset > 0)
+      {
+        peepholeDerivatives.col(0) += inGateError.col(queryOffset) %
+            state.col(queryOffset - 1);
+        peepholeDerivatives.col(1) += forgetGateError.col(queryOffset) %
+            state.col(queryOffset - 1);
+      }
+    }
+
+    g = arma::zeros<arma::Mat<eT> >(outSize * 4, 1);
+    g.submat(0, 0, outSize - 1, 0) = inGateError.col(queryOffset);
+    g.submat(outSize, 0, (outSize * 2) - 1, 0) =
+        forgetGateError.col(queryOffset);
+    g.submat(outSize * 2, 0, (outSize * 3) - 1, 0) = cellError;
+    g.submat(outSize * 3, 0, (outSize * 4) - 1, 0) =
+        outGateError.col(queryOffset);
+
+    offset = (offset + 1) % seqLen;
+  }
+
+  /**
+   * Ordinary feed backward pass of the lstm layer.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& /* input */,
+                const arma::Mat<eT>& /* gy */,
+                GradientDataType& /* g */)
+  {
+    if (peepholes && offset == 0)
+    {
+      peepholeGradient.col(0) = arma::trans((peepholeWeights.col(0).t() *
+          (inGateError.col(queryOffset) % peepholeDerivatives.col(0))) *
+          inGate.col(queryOffset).t());
+
+      peepholeGradient.col(1) = arma::trans((peepholeWeights.col(1).t() *
+          (forgetGateError.col(queryOffset) % peepholeDerivatives.col(1))) *
+          forgetGate.col(queryOffset).t());
+
+      peepholeGradient.col(2) = arma::trans((peepholeWeights.col(2).t() *
+          (outGateError.col(queryOffset) % peepholeDerivatives.col(2))) *
+          outGate.col(queryOffset).t());
+
+      peepholeDerivatives.zeros();
+    }
+  }
+
+  //! Get the peephole weights.
+  OutputDataType const& Weights() const { return peepholeWeights; }
+  //! Modify the peephole weights.
+  OutputDataType& Weights() { return peepholeWeights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the peephole gradient.
+  OutputDataType const& Gradient() const { return peepholeGradient; }
+  //! Modify the peephole gradient.
+  OutputDataType& Gradient() { return peepholeGradient; }
+
+  //! Get the sequence length.
+  size_t SeqLen() const { return seqLen; }
+  //! Modify the sequence length.
+  size_t& SeqLen() { return seqLen; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(peepholes, "peepholes");
+
+    if (peepholes)
+    {
+      ar & data::CreateNVP(peepholeWeights, "peepholeWeights");
+
+      if (Archive::is_loading::value)
+      {
+        peepholeDerivatives = arma::zeros<OutputDataType>(
+            peepholeWeights.n_rows, 3);
+      }
+    }
+  }
+
+ private:
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored peephole indication flag.
+  bool peepholes;
+
+  //! Locally-stored length of the the input sequence.
+  size_t seqLen;
+
+  //! Locally-stored sequence offset.
+  size_t offset;
+
+  //! Locally-stored query offset.
+  size_t queryOffset;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored ingate object.
+  InputDataType inGate;
+
+  //! Locally-stored ingate activation object.
+  InputDataType inGateAct;
+
+  //! Locally-stored ingate error object.
+  InputDataType inGateError;
+
+  //! Locally-stored outgate object.
+  InputDataType outGate;
+
+  //! Locally-stored outgate activation object.
+  InputDataType outGateAct;
+
+  //! Locally-stored outgate error object.
+  InputDataType outGateError;
+
+  //! Locally-stored forget object.
+  InputDataType forgetGate;
+
+  //! Locally-stored forget activation object.
+  InputDataType forgetGateAct;
+
+  //! Locally-stored forget error object.
+  InputDataType forgetGateError;
+
+  //! Locally-stored state object.
+  InputDataType state;
+
+  //! Locally-stored state erro object.
+  InputDataType stateError;
+
+  //! Locally-stored cell activation object.
+  InputDataType cellAct;
+
+  //! Locally-stored peephole weight object.
+  OutputDataType peepholeWeights;
+
+  //! Locally-stored derivatives object.
+  OutputDataType peepholeDerivatives;
+
+  //! Locally-stored peephole gradient object.
+  OutputDataType peepholeGradient;
+}; // class LSTMLayer
+
+//! Layer traits for the lstm layer.
+template<
+    class GateActivationFunction,
+    class StateActivationFunction,
+    class OutputActivationFunction,
+    typename InputDataType,
+    typename OutputDataType
+>
+class LayerTraits<LSTMLayer<GateActivationFunction,
+                            StateActivationFunction,
+                            OutputActivationFunction,
+                            InputDataType,
+                            OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = true;
+  static const bool IsConnection = false;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp
new file mode 100644
index 0000000..440db78
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/multiclass_classification_layer.hpp
@@ -0,0 +1,98 @@
+/**
+ * @file multiclass_classification_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the MulticlassClassificationLayer class, which implements a
+ * multiclass classification layer that can be used as output layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_MULTICLASS_CLASSIFICATION_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a multiclass classification layer that can be used as
+ * output layer.
+ *
+ * A convenience typedef is given:
+ *
+ *  - ClassificationLayer
+ */
+class MulticlassClassificationLayer
+{
+ public:
+  /**
+   * Create the MulticlassClassificationLayer object.
+   */
+  MulticlassClassificationLayer()
+  {
+    // Nothing to do here.
+  }
+
+  /*
+   * Calculate the error using the specified input activation and the target.
+   * The error is stored into the given error parameter.
+   *
+   * @param inputActivations Input data used for evaluating the network.
+   * @param target Target data used for evaluating the network.
+   * @param error The calculated error with respect to the input activation and
+   * the given target.
+   */
+  template<typename DataType>
+  void CalculateError(const DataType& inputActivations,
+                      const DataType& target,
+                      DataType& error)
+  {
+    error = inputActivations - target;
+  }
+
+  /*
+   * Calculate the output class using the specified input activation.
+   *
+   * @param inputActivations Input data used to calculate the output class.
+   * @param output Output class of the input activation.
+   */
+  template<typename DataType>
+  void OutputClass(const DataType& inputActivations, DataType& output)
+  {
+    output = inputActivations;
+  }
+
+  /**
+   * Serialize the layer
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+  }
+}; // class MulticlassClassificationLayer
+
+//! Layer traits for the multiclass classification layer.
+template <>
+class LayerTraits<MulticlassClassificationLayer>
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = true;
+  static const bool IsBiasLayer = false;
+  static const bool IsConnection = false;
+};
+
+/***
+ * Alias ClassificationLayer.
+ */
+using ClassificationLayer = MulticlassClassificationLayer;
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
new file mode 100644
index 0000000..d2f5fe8
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/multiply_constant_layer.hpp
@@ -0,0 +1,113 @@
+/**
+ * @file multiply_constant_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the MultiplyConstantLayer class, which multiplies the input by
+ * a (non-learnable) constant.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_MULTIPLY_CONSTANT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the multiply constant layer. The multiply constant layer
+ * multiplies the input by a (non-learnable) constant.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class MultiplyConstantLayer
+{
+ public:
+  /**
+   * Create the BaseLayer object.
+   */
+  MultiplyConstantLayer(const double scalar) : scalar(scalar)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network. Multiply the input with the
+   * specified constant scalar value.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename InputType, typename OutputType>
+  void Forward(const InputType& input, OutputType& output)
+  {
+    output = input * scalar;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network. The backward pass
+   * multiplies the error with the specified constant scalar value.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& /* input */, const DataType& gy, DataType& g)
+  {
+    g = gy * scalar;
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(scalar, "scalar");
+  }
+
+ private:
+  //! Locally-stored constant scalar value.
+  const double scalar;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class MultiplyConstantLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
new file mode 100644
index 0000000..1cfaef6
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp
@@ -0,0 +1,127 @@
+/**
+ * @file negative_log_likelihood_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the NegativeLogLikelihoodLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+#define MLPACK_METHODS_ANN_LAYER_NEGATIVE_LOG_LIKELIHOOD_Layer_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the negative log likelihood layer. The negative log
+ * likelihood layer expects that the input contains log-probabilities for each
+ * class. The layer also expects a class index, in the range between 1 and the
+ * number of classes, as target when calling the Forward function.
+ *
+ * @tparam ActivationFunction Activation function used for the embedding layer.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class NegativeLogLikelihoodLayer
+{
+ public:
+  /**
+   * Create the NegativeLogLikelihoodLayer object.
+   */
+  NegativeLogLikelihoodLayer() { /* Nothing to do here. */ }
+
+  /**
+   * Ordinary feed forward pass of a neural network. The negative log
+   * likelihood layer expects that the input contains log-probabilities for
+   * each class. The layer also expects a class index, in the range between 1
+   * and the number of classes, as target when calling the Forward function.
+   *
+   * @param input Input data that contains the log-probabilities for each class.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   */
+  template<typename eT>
+  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
+  {
+    double output = 0;
+
+    for (size_t i = 0; i < input.n_cols; ++i)
+    {
+      size_t currentTarget = target(i) - 1;
+      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+          "Target class out of range.");
+
+      output -= input(currentTarget, i);
+    }
+
+    return output;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network. The negative log
+   * likelihood layer expects that the input contains log-probabilities for
+   * each class. The layer also expects a class index, in the range between 1
+   * and the number of classes, as target when calling the Forward function.
+   *
+   * @param input The propagated input activation.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   * @param output The calculated error.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>& input,
+                const arma::Mat<eT>& target,
+                arma::Mat<eT>& output)
+  {
+    output = arma::zeros<arma::Mat<eT> >(input.n_rows, input.n_cols);
+    for (size_t i = 0; i < input.n_cols; ++i)
+    {
+      size_t currentTarget = target(i) - 1;
+      Log::Assert(currentTarget >= 0 && currentTarget < input.n_rows,
+          "Target class out of range.");
+
+      output(currentTarget, i) = -1;
+    }
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class NegativeLogLikelihoodLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp
new file mode 100644
index 0000000..f39dd3b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/one_hot_layer.hpp
@@ -0,0 +1,96 @@
+/**
+ * @file one_hot_layer.hpp
+ * @author Shangtong Zhang
+ *
+ * Definition of the OneHotLayer class, which implements a standard network
+ * layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a one hot classification layer that can be used as
+ * output layer.
+ */
+class OneHotLayer
+{
+ public:
+  /**
+   * Create the OneHotLayer object.
+   */
+  OneHotLayer()
+  {
+    // Nothing to do here.
+  }
+
+  /*
+   * Calculate the error using the specified input activation and the target.
+   * The error is stored into the given error parameter.
+   *
+   * @param inputActivations Input data used for evaluating the network.
+   * @param target Target data used for evaluating the network.
+   * @param error The calculated error with respect to the input activation and
+   * the given target.
+   */
+  template<typename DataType>
+  void CalculateError(const DataType& inputActivations,
+                      const DataType& target,
+                      DataType& error)
+  {
+    error = inputActivations - target;
+  }
+
+  /*
+   * Calculate the output class using the specified input activation.
+   *
+   * @param inputActivations Input data used to calculate the output class.
+   * @param output Output class of the input activation.
+   */
+  template<typename DataType>
+  void OutputClass(const DataType& inputActivations, DataType& output)
+  {
+    output = inputActivations;
+    output.zeros();
+
+    arma::uword maxIndex = 0;
+    inputActivations.max(maxIndex);
+    output(maxIndex) = 1;
+  }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */)
+  {
+    /* Nothing to do here */
+  }
+}; // class OneHotLayer
+
+//! Layer traits for the one-hot class classification layer.
+template <>
+class LayerTraits<OneHotLayer>
+{
+ public:
+  static const bool IsBinary = true;
+  static const bool IsOutputLayer = true;
+  static const bool IsBiasLayer = false;
+  static const bool IsConnection = false;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/pooling_layer.hpp b/src/mlpack/methods/ann/layer/pooling_layer.hpp
new file mode 100644
index 0000000..7961e3d
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/pooling_layer.hpp
@@ -0,0 +1,267 @@
+/**
+ * @file pooling_layer.hpp
+ * @author Marcus Edel
+ * @author Nilay Jain
+ *
+ * Definition of the PoolingLayer class, which attaches various pooling
+ * functions to the embedding layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_POOLING_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/pooling_rules/mean_pooling.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the pooling layer. The pooling layer works as a metaclass
+ * which attaches various functions to the embedding layer.
+ *
+ * @tparam PoolingRule Pooling function used for the embedding layer.
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename PoolingRule = MeanPooling,
+    typename InputDataType = arma::cube,
+    typename OutputDataType = arma::cube
+>
+class PoolingLayer
+{
+ public:
+  /**
+   * Create the PoolingLayer object using the specified number of units.
+   *
+   * @param kSize Size of the pooling window.
+   * @param stride The stride of the convolution operation.
+   * @param pooling The pooling strategy.
+   */
+  PoolingLayer(const size_t kSize,
+               const size_t stride = 1,
+               PoolingRule pooling = PoolingRule()) :
+      kSize(kSize),
+      stride(stride),
+      pooling(pooling)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    Pooling(input, output);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Cube<eT>& input, arma::Cube<eT>& output)
+  {
+    output = arma::zeros<arma::Cube<eT> >((input.n_rows - kSize) / stride + 1,
+        (input.n_cols - kSize) / stride + 1, input.n_slices);
+
+    for (size_t s = 0; s < input.n_slices; s++)
+      Pooling(input.slice(s), output.slice(s));
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
+   * input, calculating the function f(x) by propagating x backwards through f.
+   * Using the results from the feed forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Cube<eT>& /* unused */,
+                const arma::Cube<eT>& gy,
+                arma::Cube<eT>& g)
+  {
+    g = arma::zeros<arma::Cube<eT> >(inputParameter.n_rows,
+        inputParameter.n_cols, inputParameter.n_slices);
+
+    for (size_t s = 0; s < gy.n_slices; s++)
+    {
+      Unpooling(inputParameter.slice(s), gy.slice(s), g.slice(s));
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, using 3rd-order tensors as
+   * input, calculating the function f(x) by propagating x backwards through f.
+   * Using the results from the feed forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Cube<eT>& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::Cube<eT>& g)
+  {
+    // Generate a cube from the error matrix.
+    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(outputParameter.n_rows,
+        outputParameter.n_cols, outputParameter.n_slices);
+
+    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= gy.n_cols, j++)
+    {
+      for (size_t i = 0; i < gy.n_cols; i++)
+      {
+        arma::Col<eT> temp = gy.col(i).subvec(
+            j * outputParameter.n_rows * outputParameter.n_cols,
+            (j + 1) * outputParameter.n_rows * outputParameter.n_cols - 1);
+
+        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
+            outputParameter.n_rows, outputParameter.n_cols);
+      }
+    }
+
+    Backward(inputParameter, mappedError, g);
+  }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  InputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  InputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(kSize, "kSize");
+    ar & data::CreateNVP(pooling, "pooling");
+    ar & data::CreateNVP(stride, "stride");
+  }
+
+ private:
+  /**
+   * Apply pooling to the input and store the results.
+   *
+   * @param input The input to be apply the pooling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Pooling(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    const size_t rStep = kSize;
+    const size_t cStep = kSize;
+
+    for (size_t j = 0, colidx = 0; j < output.n_cols; ++j, colidx += stride)
+    {
+      for (size_t i = 0, rowidx = 0; i < output.n_rows; ++i, rowidx += stride)
+      {
+        output(i, j) += pooling.Pooling(input(
+            arma::span(rowidx, rowidx + rStep - 1),
+            arma::span(colidx, colidx + cStep - 1)));
+      }
+    }
+  }
+
+  /**
+   * Apply unpooling to the input and store the results.
+   *
+   * @param input The input to be apply the unpooling rule.
+   * @param output The pooled result.
+   */
+  template<typename eT>
+  void Unpooling(const arma::Mat<eT>& input,
+                 const arma::Mat<eT>& error,
+                 arma::Mat<eT>& output)
+  {
+    const size_t rStep = input.n_rows / error.n_rows;
+    const size_t cStep = input.n_cols / error.n_cols;
+
+    arma::Mat<eT> unpooledError;
+    for (size_t j = 0; j < input.n_cols; j += cStep)
+    {
+      for (size_t i = 0; i < input.n_rows; i += rStep)
+      {
+        const arma::Mat<eT>& inputArea = input(arma::span(i, i + rStep - 1),
+            arma::span(j, j + cStep - 1));
+
+        pooling.Unpooling(inputArea, error(i / rStep, j / cStep),
+            unpooledError);
+
+        output(arma::span(i, i + rStep - 1),
+            arma::span(j, j + cStep - 1)) += unpooledError;
+      }
+    }
+  }
+
+  //! Locally-stored size of the pooling window.
+  size_t kSize;
+
+  //! Locally-stored stride value by which we move filter.
+  size_t stride;
+
+  //! Locally-stored pooling strategy.
+  PoolingRule pooling;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class PoolingLayer
+
+//! Layer traits for the pooling layer.
+template<
+    typename PoolingRule,
+    typename InputDataType,
+    typename OutputDataType
+>
+class LayerTraits<PoolingLayer<PoolingRule, InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
+
diff --git a/src/mlpack/methods/ann/layer/recurrent_layer.hpp b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
new file mode 100644
index 0000000..00ffbbe
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/recurrent_layer.hpp
@@ -0,0 +1,192 @@
+/**
+ * @file recurrent_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the RecurrentLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_RECURRENT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the RecurrentLayer class. Recurrent layers can be used
+ * similarly to feed-forward layers except that the input isn't stored in the
+ * inputParameter, instead it's in stored in the recurrentParameter.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class RecurrentLayer
+{
+ public:
+  /**
+   * Create the RecurrentLayer object using the specified number of units.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   */
+  RecurrentLayer(const size_t inSize, const size_t outSize) :
+      inSize(outSize),
+      outSize(outSize),
+      recurrentParameter(arma::zeros<InputDataType>(inSize, 1))
+  {
+    weights.set_size(outSize, inSize);
+  }
+
+  /**
+   * Create the RecurrentLayer object using the specified number of units.
+   *
+   * @param outSize The number of output units.
+   */
+  RecurrentLayer(const size_t outSize) :
+      inSize(outSize),
+      outSize(outSize),
+      recurrentParameter(arma::zeros<InputDataType>(outSize, 1))
+  {
+    weights.set_size(outSize, inSize);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = input + weights * recurrentParameter;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Backward(const InputType& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::mat& g)
+  {
+    g = (weights).t() * gy;
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The propagated input activation.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& /* input */,
+                const arma::Mat<eT>& d,
+                GradientDataType& g)
+  {
+    g = d * recurrentParameter.t();
+  }
+
+  //! Get the weights.
+  OutputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  OutputDataType& Weights() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the input parameter.
+  InputDataType const& RecurrentParameter() const { return recurrentParameter; }
+  //! Modify the input parameter.
+  InputDataType& RecurrentParameter() { return recurrentParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(recurrentParameter, "recurrentParameter");
+    ar & data::CreateNVP(weights, "weights");
+  }
+
+ private:
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! Locally-stored recurrent parameter object.
+  InputDataType recurrentParameter;
+}; // class RecurrentLayer
+
+//! Layer traits for the recurrent layer.
+template<typename InputDataType, typename OutputDataType
+>
+class LayerTraits<RecurrentLayer<InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
new file mode 100644
index 0000000..c033a51
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/reinforce_normal_layer.hpp
@@ -0,0 +1,139 @@
+/**
+ * @file reinforce_normal_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the ReinforceNormalLayer class, which implements the REINFORCE
+ * algorithm for the normal distribution.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_REINFORCE_NORMAL_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the reinforce normal layer. The reinforce normal layer
+ * implements the REINFORCE algorithm for the normal distribution.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class ReinforceNormalLayer
+{
+ public:
+  /**
+   * Create the ReinforceNormalLayer object.
+   *
+   * @param stdev Standard deviation used during the forward and backward pass.
+   */
+  ReinforceNormalLayer(const double stdev) : stdev(stdev)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    if (!deterministic)
+    {
+      // Multiply by standard deviations and re-center the means to the mean.
+      output = arma::randn<arma::Mat<eT> >(input.n_rows, input.n_cols) *
+          stdev + input;
+    }
+    else
+    {
+      // Use maximum a posteriori.
+      output = input;
+    }
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType>
+  void Backward(const DataType& input,
+                const DataType& /* gy */,
+                DataType& g)
+  {
+    g = (input - inputParameter) / std::pow(stdev, 2.0);
+
+    // Multiply by reward and multiply by -1.
+    g *= -reward;
+  }
+
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+  //! Get the value of the reward parameter.
+  double Reward() const { return reward; }
+  //! Modify the value of the deterministic parameter.
+  double& Reward() { return reward; }
+
+ private:
+  //! Standard deviation used during the forward and backward pass.
+  const double stdev;
+
+  //! Locally-stored reward parameter.
+  double reward;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! If true use maximum a posteriori during the forward pass.
+  bool deterministic;
+}; // class ReinforceNormalLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/softmax_layer.hpp b/src/mlpack/methods/ann/layer/softmax_layer.hpp
new file mode 100644
index 0000000..7b38de9
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/softmax_layer.hpp
@@ -0,0 +1,114 @@
+/**
+ * @file softmax_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the SoftmaxLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_SOFTMAX_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the softmax layer. The softmax loss layer computes the
+ * multinomial logistic loss of the softmax of its inputs.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class SoftmaxLayer
+{
+ public:
+  /**
+   * Create the SoftmaxLayer object.
+   */
+  SoftmaxLayer()
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = arma::trunc_exp(input -
+        arma::repmat(arma::max(input), input.n_rows, 1));
+    output /= arma::accu(output);
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  void Backward(const arma::Mat<eT>& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
+  {
+    g = gy;
+  }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  InputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  InputDataType& Delta() { return delta; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& /* ar */, const unsigned int /* version */)
+  {
+    /* Nothing to do here */
+  }
+
+ private:
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class SoftmaxLayer
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
new file mode 100644
index 0000000..8b14ecb
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/sparse_bias_layer.hpp
@@ -0,0 +1,177 @@
+/**
+ * @file sparse_bias_layer.hpp
+ * @author Tham Ngap Wei
+ *
+ * Definition of the SparseBiasLayer class.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_SPARSE_BIAS_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * An implementation of a bias layer design for sparse autoencoder.
+ * The BiasLayer class represents a single layer of a neural network.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class SparseBiasLayer
+{
+ public:
+  /**
+   * Create the SparseBiasLayer object using the specified number of units and
+   * bias parameter.
+   *
+   * @param outSize The number of output units.
+   * @param batchSize The batch size used to train the network.
+   * @param bias The bias value.
+   */
+  SparseBiasLayer(const size_t outSize, const size_t batchSize) :
+      outSize(outSize),
+      batchSize(batchSize)
+  {
+    weights.set_size(outSize, 1);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = input + arma::repmat(weights, 1, input.n_cols);
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename DataType, typename ErrorType>
+  void Backward(const DataType& /* unused */,
+                const ErrorType& gy,
+                ErrorType& g)
+  {
+    g = gy;
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the bias.
+   *
+   * @param input The propagated input.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Gradient(const InputType& /* input */,
+                const arma::Mat<eT>& d,
+                InputDataType& g)
+  {
+    g = arma::sum(d, 1) / static_cast<typename InputDataType::value_type>(
+        batchSize);
+  }
+
+  //! Get the batch size
+  size_t BatchSize() const { return batchSize; }
+  //! Modify the batch size
+  size_t& BatchSize() { return batchSize; }
+
+  //! Get the weights.
+  InputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  InputDataType& Weights() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  InputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  InputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(weights, "weights");
+    ar & data::CreateNVP(batchSize, "batchSize");
+  }
+
+ private:
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! The batch size used to train the network.
+  size_t batchSize;
+
+  //! Locally-stored weight object.
+  InputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  InputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class SparseBiasLayer
+
+//! Layer traits for the bias layer.
+template<typename InputDataType, typename OutputDataType
+>
+class LayerTraits<SparseBiasLayer<InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = true;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_input_layer.hpp b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
new file mode 100644
index 0000000..0e4aa54
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/sparse_input_layer.hpp
@@ -0,0 +1,180 @@
+/**
+ * @file sparse_input_layer.hpp
+ * @author Tham Ngap Wei
+ *
+ * Definition of the sparse input class which serve as the first layer
+ * of the sparse autoencoder
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_SPARSE_INPUT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+#include <type_traits>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the SparseInputLayer. The SparseInputLayer class represents
+ * the first layer of sparse autoencoder
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+    >
+class SparseInputLayer
+{
+ public:
+  /**
+   * Create the SparseInputLayer object using the specified number of units.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   * @param lambda L2-regularization parameter.
+   */
+  SparseInputLayer(const size_t inSize,
+                   const size_t outSize,
+                   const double lambda = 0.0001) :
+    inSize(inSize),
+    outSize(outSize),
+    lambda(lambda)
+  {
+    weights.set_size(outSize, inSize);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = weights * input;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Backward(const InputType& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
+  {
+    g = gy;
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The propagated input.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT, typename GradientDataType>
+  void Gradient(const InputType& input,
+                const arma::Mat<eT>& d,
+                GradientDataType& g)
+  {
+    g = d * input.t() / static_cast<typename InputType::value_type>(
+        input.n_cols) + lambda * weights;
+  }
+
+  //! Get the weights.
+  OutputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  OutputDataType& Weights() { return weights; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(weights, "weights");
+    ar & data::CreateNVP(lambda, "lambda");
+  }
+
+ private:
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! L2-regularization parameter.
+  double lambda;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class SparseInputLayer
+
+//! Layer traits for the SparseInputLayer.
+template<typename InputDataType, typename OutputDataType
+>
+class LayerTraits<SparseInputLayer<InputDataType, OutputDataType> >
+{
+public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/sparse_output_layer.hpp b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
new file mode 100644
index 0000000..371c200
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/sparse_output_layer.hpp
@@ -0,0 +1,227 @@
+/**
+ * @file sparse_output_layer.hpp
+ * @author Tham Ngap Wei
+ *
+ * This is the fourth layer of sparse autoencoder.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_SPARSE_OUTPUT_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the SparseOutputLayer class. The SparseOutputLayer class
+ * represents  the fourth layer of the sparse autoencoder.
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
+>
+class SparseOutputLayer
+{
+ public:
+  /**
+   * Create the SparseLayer object using the specified number of units.
+   *
+   * @param inSize The number of input units.
+   * @param outSize The number of output units.
+   */
+  SparseOutputLayer(const size_t inSize,
+                    const size_t outSize,
+                    const double lambda = 0.0001,
+                    const double beta = 3,
+                    const double rho = 0.01) :
+    inSize(inSize),
+    outSize(outSize),
+    lambda(lambda),
+    beta(beta),
+    rho(rho)
+  {
+    weights.set_size(outSize, inSize);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
+   */
+  template<typename eT>
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
+  {
+    output = weights * input;
+    // Average activations of the hidden layer.
+    rhoCap = arma::sum(input, 1) / static_cast<double>(input.n_cols);
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards trough f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Backward(const InputType& input,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
+  {
+    const arma::mat klDivGrad = beta * (-(rho / rhoCap) + (1 - rho) /
+          (1 - rhoCap));
+
+    // NOTE: if the armadillo version high enough, find_nonfinite can prevents
+    // overflow value:
+    // klDivGrad.elem(arma::find_nonfinite(klDivGrad)).zeros();
+    g = weights.t() * gy +
+        arma::repmat(klDivGrad, 1, input.n_cols);
+  }
+
+  /*
+   * Calculate the gradient using the output delta and the input activation.
+   *
+   * @param input The propagated input.
+   * @param d The calculated error.
+   * @param g The calculated gradient.
+   */
+  template<typename InputType, typename eT>
+  void Gradient(const InputType input, const arma::Mat<eT>& d, arma::Mat<eT>& g)
+  {
+    g = d * input.t() / static_cast<typename InputType::value_type>(
+        input.n_cols) + lambda * weights;
+  }
+
+  //! Sets the KL divergence parameter.
+  void Beta(const double b)
+  {
+    beta = b;
+  }
+
+  //! Gets the KL divergence parameter.
+  double Beta() const
+  {
+    return beta;
+  }
+
+  //! Sets the sparsity parameter.
+  void Rho(const double r)
+  {
+    rho = r;
+  }
+
+  //! Gets the sparsity parameter.
+  double Rho() const
+  {
+    return rho;
+  }
+
+  //! Get the weights.
+  OutputDataType const& Weights() const { return weights; }
+  //! Modify the weights.
+  OutputDataType& Weights() { return weights; }
+
+  //! Get the RhoCap.
+  OutputDataType const& RhoCap() const { return rhoCap; }
+  //! Modify the RhoCap.
+  OutputDataType& RhoCap() { return rhoCap; }
+
+  //! Get the input parameter.
+  InputDataType const& InputParameter() const { return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType const& OutputParameter() const { return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType const& Delta() const { return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the gradient.
+  OutputDataType const& Gradient() const { return gradient; }
+  //! Modify the gradient.
+  OutputDataType& Gradient() { return gradient; }
+
+  /**
+   * Serialize the layer.
+   */
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */)
+  {
+    ar & data::CreateNVP(weights, "weights");
+    ar & data::CreateNVP(lambda, "lambda");
+    ar & data::CreateNVP(beta, "beta");
+    ar & data::CreateNVP(rho, "rho");
+  }
+
+ private:
+  //! Locally-stored number of input units.
+  size_t inSize;
+
+  //! Locally-stored number of output units.
+  size_t outSize;
+
+  //! L2-regularization parameter.
+  double lambda;
+
+  //! KL divergence parameter.
+  double beta;
+
+  //! Sparsity parameter.
+  double rho;
+
+  //! Locally-stored weight object.
+  OutputDataType weights;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored gradient object.
+  OutputDataType gradient;
+
+  //! Average activations of the hidden layer.
+  OutputDataType rhoCap;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+}; // class SparseOutputLayer
+
+//! Layer traits for the SparseOutputLayer.
+template<typename InputDataType, typename OutputDataType
+    >
+class LayerTraits<SparseOutputLayer<InputDataType, OutputDataType> >
+{
+public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
new file mode 100644
index 0000000..393dbcd
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
@@ -0,0 +1,171 @@
+/**
+ * @file vr_class_reward_layer.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the VRClassRewardLayer class, which implements the variance
+ * reduced classification reinforcement layer.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of the variance reduced classification reinforcement layer.
+ * This layer is meant to be used in combination with the reinforce normal layer
+ * (ReinforceNormalLayer), which expects that an reward:
+ * (1 for success, 0 otherwise).
+ *
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ */
+template <
+    typename InputDataType = arma::field<arma::mat>,
+    typename OutputDataType = arma::field<arma::mat>
+>
+class VRClassRewardLayer
+{
+ public:
+  /**
+   * Create the VRClassRewardLayer object.
+   *
+   * @param scale Parameter used to scale the reward.
+   * @param sizeAverage Take the average over all batches.
+   */
+  VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) :
+      scale(scale),
+      sizeAverage(sizeAverage)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data that contains the log-probabilities for each class.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   */
+  template<typename eT>
+  double Forward(const arma::field<arma::Mat<eT> >& input,
+                 const arma::Mat<eT>& target)
+  {
+    return Forward(input(0, 0), target);
+  }
+
+  /**
+   * Ordinary feed forward pass of a neural network, evaluating the function
+   * f(x) by propagating the activity forward through f.
+   *
+   * @param input Input data that contains the log-probabilities for each class.
+   * @param target The target vector, that contains the class index in the range
+   *        between 1 and the number of classes.
+   */
+  template<typename eT>
+  double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
+  {
+    reward = 0;
+    arma::uword index = 0;
+
+    for (size_t i = 0; i < input.n_cols; i++)
+    {
+      input.unsafe_col(i).max(index);
+      reward = ((index + 1) == target(i)) * scale;
+    }
+
+    if (sizeAverage)
+    {
+      return -reward / input.n_cols;
+    }
+
+    return -reward;
+  }
+
+  /**
+   * Ordinary feed backward pass of a neural network, calculating the function
+   * f(x) by propagating x backwards through f. Using the results from the feed
+   * forward pass.
+   *
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
+   */
+  template<typename eT>
+  double Backward(const arma::field<arma::Mat<eT> >& input,
+                const arma::Mat<eT>& /* gy */,
+                arma::field<arma::Mat<eT> >& g)
+  {
+    g = arma::field<arma::Mat<eT> >(2, 1);
+    g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols);
+
+    double vrReward = reward - arma::as_scalar(input(1, 0));
+    if (sizeAverage)
+    {
+      vrReward /= input(0, 0).n_cols;
+    }
+
+    const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0;
+
+    g(1, 0) = norm * (input(1, 0) - reward);
+
+    return vrReward;
+  }
+
+  //! Get the input parameter.
+  InputDataType& InputParameter() const {return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
+
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const {return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
+
+  //! Get the delta.
+  OutputDataType& Delta() const {return delta; }
+  //! Modify the delta.
+  OutputDataType& Delta() { return delta; }
+
+  //! Get the value of the deterministic parameter.
+  bool Deterministic() const { return deterministic; }
+  //! Modify the value of the deterministic parameter.
+  bool& Deterministic() { return deterministic; }
+
+ private:
+  //! Locally-stored value to scale the reward.
+  const double scale;
+
+  //! If true take the average over all batches.
+  const bool sizeAverage;
+
+  //! Locally stored reward parameter.
+  double reward;
+
+  //! Locally-stored delta object.
+  OutputDataType delta;
+
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
+
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
+
+  //! If true dropout and scaling is disabled, see notes above.
+  bool deterministic;
+}; // class VRClassRewardLayer
+
+}; // namespace ann
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/network_util.hpp b/src/mlpack/methods/ann/network_util.hpp
new file mode 100644
index 0000000..109e4fe
--- /dev/null
+++ b/src/mlpack/methods/ann/network_util.hpp
@@ -0,0 +1,247 @@
+/**
+ * @file network_util.hpp
+ * @author Marcus Edel
+ *
+ * Neural network utilities.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_NETWORK_UTIL_HPP
+#define MLPACK_METHODS_ANN_NETWORK_UTIL_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+/**
+ * Neural network utility functions.
+ */
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Auxiliary function to get the number of weights of the specified network.
+ *
+ * @param network The network used for specifying the number of weights.
+ * @return The number of weights.
+ */
+template<size_t I = 0, typename... Tp>
+typename std::enable_if<I < sizeof...(Tp), size_t>::type
+NetworkSize(std::tuple<Tp...>& network);
+
+template<size_t I, typename... Tp>
+typename std::enable_if<I == sizeof...(Tp), size_t>::type
+NetworkSize(std::tuple<Tp...>& network);
+
+/**
+ * Auxiliary function to get the number of weights of the specified layer.
+ *
+ * @param layer The layer used for specifying the number of weights.
+ * @param output The layer output parameter.
+ * @return The number of weights.
+ */
+template<typename T, typename P>
+typename std::enable_if<
+    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
+LayerSize(T& layer, P& output);
+
+template<typename T, typename P>
+typename std::enable_if<
+    HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
+LayerSize(T& layer, P& output);
+
+/**
+ * Auxiliary function to set the weights of the specified network.
+ *
+ * @param weights The weights used to set the weights of the network.
+ * @param network The network used to set the weights.
+ * @param offset The memory offset of the weights.
+ */
+template<size_t I = 0, typename... Tp>
+typename std::enable_if<I < sizeof...(Tp), void>::type
+NetworkWeights(arma::mat& weights,
+               std::tuple<Tp...>& network,
+               size_t offset = 0);
+
+template<size_t I, typename... Tp>
+typename std::enable_if<I == sizeof...(Tp), void>::type
+NetworkWeights(arma::mat& weights,
+               std::tuple<Tp...>& network,
+               size_t offset = 0);
+
+/**
+ * Auxiliary function to set the weights of the specified layer.
+ *
+ * @param layer The layer used to set the weights.
+ * @param weights The weights used to set the weights of the layer.
+ * @param offset The memory offset of the weights.
+ * @param output The output parameter of the layer.
+ * @return The number of weights.
+ */
+template<typename T>
+typename std::enable_if<
+    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
+LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::mat& output);
+
+template<typename T>
+typename std::enable_if<
+    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
+LayerWeights(T& layer, arma::mat& weights, size_t offset, arma::cube& output);
+
+template<typename T, typename P>
+typename std::enable_if<
+    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
+LayerWeights(T& layer, arma::mat& weights, size_t offset, P& output);
+
+/**
+ * Auxiliary function to set the gradients of the specified network.
+ *
+ * @param gradients The gradients used to set the gradient of the network.
+ * @param network The network used to set the gradients.
+ * @param offset The memory offset of the gradients.
+ * return The number of gradients.
+ */
+template<size_t I = 0, typename... Tp>
+typename std::enable_if<I < sizeof...(Tp), void>::type
+NetworkGradients(arma::mat& gradients,
+               std::tuple<Tp...>& network,
+               size_t offset = 0);
+
+template<size_t I, typename... Tp>
+typename std::enable_if<I == sizeof...(Tp), void>::type
+NetworkGradients(arma::mat& gradients,
+               std::tuple<Tp...>& network,
+               size_t offset = 0);
+
+/**
+ * Auxiliary function to set the gradients of the specified layer.
+ *
+ * @param layer The layer used to set the gradients.
+ * @param gradients The gradients used to set the gradient of the layer.
+ * @param offset The memory offset of the gradients.
+ * @param output The output parameter of the layer.
+ * @return The number of gradients.
+ */
+template<typename T>
+typename std::enable_if<
+    HasGradientCheck<T, arma::mat&(T::*)()>::value, size_t>::type
+LayerGradients(T& layer,
+               arma::mat& gradients,
+               size_t offset,
+               arma::mat& output);
+
+template<typename T>
+typename std::enable_if<
+    HasGradientCheck<T, arma::cube&(T::*)()>::value, size_t>::type
+LayerGradients(T& layer,
+               arma::mat& gradients,
+               size_t offset,
+               arma::cube& output);
+
+template<typename T, typename P>
+typename std::enable_if<
+    !HasGradientCheck<T, P&(T::*)()>::value, size_t>::type
+LayerGradients(T& layer, arma::mat& gradients, size_t offset, P& output);
+
+/**
+ * Auxiliary function to get the input size of the specified network.
+ *
+ * @param network The network used for specifying the input size.
+ * @return The input size.
+ */
+template<size_t I = 0, typename... Tp>
+typename std::enable_if<I < sizeof...(Tp), size_t>::type
+NetworkInputSize(std::tuple<Tp...>& network);
+
+template<size_t I, typename... Tp>
+typename std::enable_if<I == sizeof...(Tp), size_t>::type
+NetworkInputSize(std::tuple<Tp...>& network);
+
+/**
+ * Auxiliary function to get the input size of the specified layer.
+ *
+ * @param layer The layer used for specifying the input size.
+ * @param output The layer output parameter.
+ * @return The input size.
+ */
+template<typename T, typename P>
+typename std::enable_if<
+    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
+LayerInputSize(T& layer, P& output);
+
+template<typename T, typename P>
+typename std::enable_if<
+    HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
+LayerInputSize(T& layer, P& output);
+
+/**
+ * Auxiliary function to set the weights of the specified network using a given
+ * initialize rule.
+ *
+ * @param initializeRule The rule used to initialize the network weights.
+ * @param weights The weights used to set the weights of the network.
+ * @param network The network used to set the weights.
+ * @param offset The memory offset of the weights.
+ */
+template<size_t I = 0, typename InitializationRuleType, typename... Tp>
+typename std::enable_if<I < sizeof...(Tp), void>::type
+NetworkWeights(InitializationRuleType& initializeRule,
+               arma::mat& weights,
+               std::tuple<Tp...>& network,
+               size_t offset = 0);
+
+template<size_t I, typename InitializationRuleType, typename... Tp>
+typename std::enable_if<I == sizeof...(Tp), void>::type
+NetworkWeights(InitializationRuleType& initializeRule,
+               arma::mat& weights,
+               std::tuple<Tp...>& network,
+               size_t offset = 0);
+
+/**
+ * Auxiliary function to set the weights of the specified layer using the given
+ * initialize rule.
+ *
+ * @param initializeRule The rule used to initialize the layer weights.
+ * @param layer The layer used to set the weights.
+ * @param weights The weights used to set the weights of the layer.
+ * @param offset The memory offset of the weights.
+ * @param output The output parameter of the layer.
+ * @return The number of weights.
+ */
+template<typename InitializationRuleType, typename T>
+typename std::enable_if<
+    HasWeightsCheck<T, arma::mat&(T::*)()>::value, size_t>::type
+LayerWeights(InitializationRuleType& initializeRule,
+             T& layer,
+             arma::mat& weights,
+             size_t offset,
+             arma::mat& output);
+
+template<typename InitializationRuleType, typename T>
+typename std::enable_if<
+    HasWeightsCheck<T, arma::cube&(T::*)()>::value, size_t>::type
+LayerWeights(InitializationRuleType& initializeRule,
+             T& layer,
+             arma::mat& weights,
+             size_t offset,
+             arma::cube& output);
+
+template<typename InitializationRuleType, typename T, typename P>
+typename std::enable_if<
+    !HasWeightsCheck<T, P&(T::*)()>::value, size_t>::type
+LayerWeights(InitializationRuleType& initializeRule,
+             T& layer,
+             arma::mat& weights,
+             size_t offset,
+             P& output);
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "network_util_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/performance_functions/cee_function.hpp b/src/mlpack/methods/ann/performance_functions/cee_function.hpp
new file mode 100644
index 0000000..11098e0
--- /dev/null
+++ b/src/mlpack/methods/ann/performance_functions/cee_function.hpp
@@ -0,0 +1,74 @@
+/**
+ * @file cee_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the cross-entropy error performance
+ * function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_CEE_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+#include <mlpack/methods/ann/layer/linear_layer.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The cross-entropy error performance function measures the network's
+ * performance according to the cross entropy errors. The log in the cross-
+ * entropy take sinto account the closeness of a prediction and is a more
+ * granular way to calculate the error.
+ *
+ * @tparam Layer The layer that is connected with the output layer.
+ */
+template<
+    class Layer = LinearLayer< >
+>
+class CrossEntropyErrorFunction
+{
+ public:
+  /**
+   * Computes the cross-entropy error function..
+   *
+   * @param network Network type of FFN, CNN or RNN
+   * @param target Target data.
+   * @param error same as place holder
+   * @return sum of squared errors.
+   */
+  template<typename DataType, typename... Tp>
+  static double Error(const std::tuple<Tp...>& network,
+                      const DataType& target, const DataType &error)
+  {
+    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
+                 target, error);
+  }
+
+  /**
+   * Computes the cross-entropy error function.
+   *
+   * @param input Input data.
+   * @param target Target data.
+   * @return cross-entropy error.
+   */
+  template<typename DataType>
+  static double Error(const DataType& input, const DataType& target, const DataType&)
+  {
+    if (LayerTraits<Layer>::IsBinary)
+      return -arma::dot(arma::trunc_log(arma::abs(target - input)), target);
+
+    return -arma::dot(arma::trunc_log(input), target);
+  }
+
+}; // class CrossEntropyErrorFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/performance_functions/mse_function.hpp b/src/mlpack/methods/ann/performance_functions/mse_function.hpp
new file mode 100644
index 0000000..76322b5
--- /dev/null
+++ b/src/mlpack/methods/ann/performance_functions/mse_function.hpp
@@ -0,0 +1,61 @@
+/**
+ * @file mse_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the mean squared error performance function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_MSE_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The mean squared error performance function measures the network's
+ * performance according to the mean of squared errors.
+ */
+class MeanSquaredErrorFunction
+{
+  public:
+  /**
+   * Computes the mean squared error function.
+   *
+   * @param network Network type of FFN, CNN or RNN
+   * @param target Target data.
+   * @param error same as place holder
+   * @return sum of squared errors.
+   */
+  template<typename DataType, typename... Tp>
+  static double Error(const std::tuple<Tp...>& network,
+                      const DataType& target, const DataType &error)
+  {
+    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
+                 target, error);
+  }
+
+  /**
+   * Computes the mean squared error function.
+   *
+   * @param input Input data.
+   * @param target Target data.
+   * @return mean of squared errors.
+   */
+  template<typename DataType>
+  static double Error(const DataType& input, const DataType& target, const DataType&)
+  {
+    return arma::mean(arma::mean(arma::square(target - input)));
+  }
+
+}; // class MeanSquaredErrorFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/performance_functions/sparse_function.hpp b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp
new file mode 100644
index 0000000..4586470
--- /dev/null
+++ b/src/mlpack/methods/ann/performance_functions/sparse_function.hpp
@@ -0,0 +1,141 @@
+/**
+ * @file sparse_function.hpp
+ * @author Siddharth Agrawal
+ * @author Tham Ngap Wei
+ *
+ * Definition and implementation of the sparse performance function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+
+#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SPARSE_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The cost function design for the sparse autoencoder.
+ */
+template<typename DataType = arma::mat>
+class SparseErrorFunction
+{
+ public:
+  /**
+   * Computes the cost of sparse autoencoder.
+   *
+   * @param lambda L2-regularization parameter.
+   * @param beta KL divergence parameter.
+   * @param rho Sparsity parameter.
+   */
+  SparseErrorFunction(const double lambda = 0.0001,
+                      const double beta = 3,
+                      const double rho = 0.01) :
+    lambda(lambda), beta(beta), rho(rho)
+  {
+    // Nothing to do here.
+  }
+
+  SparseErrorFunction(SparseErrorFunction &&layer) noexcept
+  {
+    *this = std::move(layer);
+  }
+
+  SparseErrorFunction& operator=(SparseErrorFunction &&layer) noexcept
+  {
+    lambda = layer.lambda;
+    beta = layer.beta;
+    rho = layer.rho;
+
+    return *this;
+  }
+
+  //! Get the KL divergence parameter.
+  double Beta() const { return beta; }
+  //! Modify the KL divergence parameter.
+  void Beta(double value) { beta = value;}
+
+  //! Get the L2-regularization parameter.
+  double Lambda() const { return lambda; }
+  //! Modify the L2-regularization parameter.
+  void Lambda(double value) { lambda = value;}
+
+  //! Get the sparsity parameter.
+  double Rho() const { return rho; }
+  //! Modify the sparsity parameter.
+  void Rho(double value) { rho = value;}
+
+  /**
+   * Computes the cost of sparse autoencoder.
+   *
+   * @param network Network type of FFN, CNN or RNN
+   * @param target Target data.
+   * @param error different between output and the input
+   * @return sum of squared errors.
+   */
+  template<typename InType, typename Tp>
+  double Error(const Tp& network,
+               const InType& target, const InType &error)
+  {
+    return Error(std::get<0>(network).Weights(), std::get<3>(network).Weights(),
+        std::get<3>(network).RhoCap(), target, error);
+  }
+
+  /**
+   * Computes the cost of sparse autoencoder.
+   *
+   * @param w1 weights of hidden layer
+   * @param w2 weights of output layer
+   * @param rhoCap Average activations of the hidden layer
+   * @param target Target data.
+   * @param error different between output and the input
+   * @return sum of squared errors.
+   */
+  template<typename InType>
+  double Error(const InType& w1, const InType& w2,
+               const InType& rhoCap, const InType& target,
+               const InType& error)
+  {
+    // Calculate squared L2-norms of w1 and w2.
+    const double wL2SquaredNorm =
+        arma::accu(w1 % w1) + arma::accu(w2 % w2);
+
+    // Calculate the reconstruction error, the regularization cost and the KL
+    // divergence cost terms. 'sumOfSquaresError' is the average squared l2-norm
+    // of the reconstructed data difference. 'weightDecay' is the squared l2-norm
+    // of the weights w1 and w2. 'klDivergence' is the cost of the hidden layer
+    // activations not being low. It is given by the following formula:
+    // KL = sum_over_hSize(rho*log(rho/rhoCaq) + (1-rho)*log((1-rho)/(1-rhoCap)))
+    const double sumOfSquaresError =
+        0.5 * arma::accu(error % error) / target.n_cols;
+
+    const double weightDecay = 0.5 * lambda * wL2SquaredNorm;
+    const double klDivergence =
+        beta * arma::accu(rho * arma::trunc_log(rho / rhoCap) + (1 - rho) *
+                          arma::trunc_log((1 - rho) / (1 - rhoCap)));
+
+    // The cost is the sum of the terms calculated above.
+    return sumOfSquaresError + weightDecay + klDivergence;
+  }
+
+ private:
+  //! Locally stored L2-regularization parameter.
+  double lambda;
+
+  //! Locally stored KL divergence parameter.
+  double beta;
+
+  //! Locally stored sparsity parameter.
+  double rho;
+
+}; // class SparseErrorFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/performance_functions/sse_function.hpp b/src/mlpack/methods/ann/performance_functions/sse_function.hpp
new file mode 100644
index 0000000..a8d96f5
--- /dev/null
+++ b/src/mlpack/methods/ann/performance_functions/sse_function.hpp
@@ -0,0 +1,64 @@
+/**
+ * @file sse_function.hpp
+ * @author Marcus Edel
+ *
+ * Definition and implementation of the sum squared error performance function.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP
+#define MLPACK_METHODS_ANN_PERFORMANCE_FUNCTIONS_SSE_FUNCTION_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * The sum squared error performance function measures the network's performance
+ * according to the sum of squared errors.
+ */
+class SumSquaredErrorFunction
+{
+  public:
+  /**
+   * Computes the sum squared error function.
+   *
+   * @param network Network type of FFN, CNN or RNN
+   * @param target Target data.
+   * @param error same as place holder
+   * @return sum of squared errors.
+   */
+  template<typename DataType, typename... Tp>
+  static double Error(const std::tuple<Tp...>& network,
+                      const DataType& target,
+                      const DataType &error)
+  {
+    return Error(std::get<sizeof...(Tp) - 1>(network).OutputParameter(),
+                 target, error);
+  }
+
+  /**
+   * Computes the sum squared error function.
+   *
+   * @param input Input data.
+   * @param target Target data.
+   * @return sum of squared errors.
+   */
+  template<typename DataType>
+  static double Error(const DataType& input,
+                      const DataType& target,
+                      const DataType&)
+  {
+    return arma::sum(arma::square(target - input));
+  }
+
+}; // class SumSquaredErrorFunction
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp
new file mode 100644
index 0000000..f165f2b
--- /dev/null
+++ b/src/mlpack/methods/ann/pooling_rules/max_pooling.hpp
@@ -0,0 +1,56 @@
+/**
+ * @file max_pooling.hpp
+ * @author Shangtong Zhang
+ *
+ * Definition of the MaxPooling class, which implements max pooling.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP
+#define MLPACK_METHODS_ANN_POOLING_RULES_MAX_POOLING_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/*
+ * The max pooling rule for convolution neural networks. Take the maximum value
+ * within the receptive block.
+ */
+class MaxPooling
+{
+ public:
+  /*
+   * Return the maximum value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   */
+  template<typename MatType>
+  double Pooling(const MatType& input)
+  {
+    return input.max();
+  }
+
+  /*
+   * Set the maximum value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   * @param value The unpooled value.
+   * @param output The unpooled output data.
+   */
+  template<typename MatType>
+  void Unpooling(const MatType& input, const double value, MatType& output)
+  {
+    output = MatType(input.n_rows, input.n_cols);
+    output.fill(value / input.n_elem);
+  }
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp
new file mode 100644
index 0000000..f921e10
--- /dev/null
+++ b/src/mlpack/methods/ann/pooling_rules/mean_pooling.hpp
@@ -0,0 +1,56 @@
+/**
+ * @file mean_pooling.hpp
+ * @author Shangtong Zhang
+ *
+ * Definition of the MeanPooling class, which implements mean pooling.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP
+#define MLPACK_METHODS_ANN_POOLING_RULES_MEAN_POOLING_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/*
+ * The mean pooling rule for convolution neural networks. Average all values
+ * within the receptive block.
+ */
+class MeanPooling
+{
+ public:
+  /*
+   * Return the average value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   */
+  template<typename MatType>
+  double Pooling(const MatType& input)
+  {
+    return arma::mean(arma::mean(input));
+  }
+
+  /*
+   * Set the average value within the receptive block.
+   *
+   * @param input Input used to perform the pooling operation.
+   * @param value The unpooled value.
+   * @param output The unpooled output data.
+   */
+  template<typename MatType>
+  void Unpooling(const MatType& input, const double value, MatType& output)
+  {
+    output = MatType(input.n_rows, input.n_cols);
+    output.fill(value / input.n_elem);
+  }
+};
+
+} // namespace ann
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
new file mode 100644
index 0000000..d3c4521
--- /dev/null
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -0,0 +1,799 @@
+/**
+ * @file rnn.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the RNN class, which implements recurrent neural networks.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_ANN_RNN_HPP
+#define MLPACK_METHODS_ANN_RNN_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include <mlpack/methods/ann/network_util.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp>
+#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * Implementation of a standard recurrent neural network.
+ *
+ * @tparam LayerTypes Contains all layer modules used to construct the network.
+ * @tparam OutputLayerType The output layer type used to evaluate the network.
+ * @tparam InitializationRuleType Rule used to initialize the weight matrix.
+ * @tparam PerformanceFunction Performance strategy used to calculate the error.
+ */
+template <
+  typename LayerTypes,
+  typename OutputLayerType,
+  typename InitializationRuleType = NguyenWidrowInitialization,
+  class PerformanceFunction = CrossEntropyErrorFunction<>
+>
+class RNN
+{
+ public:
+  //! Convenience typedef for the internal model construction.
+  using NetworkType = RNN<LayerTypes,
+                          OutputLayerType,
+                          InitializationRuleType,
+                          PerformanceFunction>;
+
+  /**
+   * Create the RNN object with the given predictors and responses set (this is
+   * the set that is used to train the network) and the given optimizer.
+   * Optionally, specify which initialize rule and performance function should
+   * be used.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Output layer used to evaluate the network.
+   * @param predictors Input training variables.
+   * @param responses Outputs resulting from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network parameter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to calculate the error.
+   */
+  template<typename LayerType,
+           typename OutputType,
+           template<typename> class OptimizerType>
+  RNN(LayerType &&network,
+      OutputType &&outputLayer,
+      const arma::mat& predictors,
+      const arma::mat& responses,
+      OptimizerType<NetworkType>& optimizer,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Create the RNN object with the given predictors and responses set (this is
+   * the set that is used to train the network). Optionally, specify which
+   * initialize rule and performance function should be used.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Output layer used to evaluate the network.
+   * @param predictors Input training variables.
+   * @param responses Outputs resulting from input training variables.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network parameter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to calculate the error.
+   */
+  template<typename LayerType, typename OutputType>
+  RNN(LayerType &&network,
+      OutputType &&outputLayer,
+      const arma::mat& predictors,
+      const arma::mat& responses,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Create the RNN object with an empty predictors and responses set and
+   * default optimizer. Make sure to call Train(predictors, responses) when
+   * training.
+   *
+   * @param network Network modules used to construct the network.
+   * @param outputLayer Output layer used to evaluate the network.
+   * @param initializeRule Optional instantiated InitializationRule object
+   *        for initializing the network parameter.
+   * @param performanceFunction Optional instantiated PerformanceFunction
+   *        object used to calculate the error.
+   */
+  template<typename LayerType, typename OutputType>
+  RNN(LayerType &&network,
+      OutputType &&outputLayer,
+      InitializationRuleType initializeRule = InitializationRuleType(),
+      PerformanceFunction performanceFunction = PerformanceFunction());
+
+  /**
+   * Train the recurrent neural network on the given input data. By default, the
+   * SGD optimization algorithm is used, but others can be specified
+   * (such as mlpack::optimization::RMSprop).
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::SGD
+  >
+  void Train(const arma::mat& predictors, const arma::mat& responses);
+
+  /**
+   * Train the recurrent neural network with the given instantiated optimizer.
+   * Using this overload allows configuring the instantiated optimizer before
+   * training is performed.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::SGD
+  >
+  void Train(OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Train the recurrent neural network on the given input data using the given
+   * optimizer.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::SGD
+  >
+  void Train(const arma::mat& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Predict the responses to a given set of predictors. The responses will
+   * reflect the output of the given output layer as returned by the
+   * OutputClass() function.
+   *
+   * @param predictors Input predictors.
+   * @param responses Matrix to put output predictions of responses into.
+   */
+  void Predict(arma::mat& predictors, arma::mat& responses);
+
+  /**
+   * Evaluate the recurrent neural network with the given parameters. This
+   * function is usually called by the optimizer to train the model.
+   *
+   * @param parameters Matrix model parameters.
+   * @param i Index of point to use for objective function evaluation.
+   * @param deterministic Whether or not to train or test the model. Note some
+   * layer act differently in training or testing mode.
+   */
+  double Evaluate(const arma::mat& parameters,
+                  const size_t i,
+                  const bool deterministic = true);
+
+  /**
+   * Evaluate the gradient of the recurrent neural network with the given
+   * parameters, and with respect to only one point in the dataset. This is
+   * useful for optimizers such as SGD, which require a separable objective
+   * function.
+   *
+   * @param parameters Matrix of the model parameters to be optimized.
+   * @param i Index of points to use for objective function gradient evaluation.
+   * @param gradient Matrix to output gradient into.
+   */
+  void Gradient(const arma::mat& parameters,
+                const size_t i,
+                arma::mat& gradient);
+
+  //! Return the number of separable functions (the number of predictor points).
+  size_t NumFunctions() const { return numFunctions; }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameter; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameter; }
+
+  //! Serialize the model.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  /*
+   * Predict the response of the given input matrix.
+   */
+  template <typename DataType>
+  void SinglePredict(const DataType& input, DataType& output)
+  {
+    deterministic = true;
+    seqLen = input.n_rows / inputSize;
+    ResetParameter(network);
+
+    // Iterate through the input sequence and perform the feed forward pass.
+    for (seqNum = 0; seqNum < seqLen; seqNum++)
+    {
+      // Perform the forward pass and save the activations.
+      Forward(input.rows(seqNum * inputSize, (seqNum + 1) * inputSize - 1),
+          network);
+      SaveActivations(network);
+
+      // Retrieve output of the subsequence.
+      if (seqOutput)
+      {
+        DataType seqOutput;
+        OutputPrediction(seqOutput, network);
+        output = arma::join_cols(output, seqOutput);
+      }
+    }
+
+    // Retrieve output of the complete sequence.
+    if (!seqOutput)
+      OutputPrediction(output, network);
+  }
+
+  /**
+   * Reset the network by clearing the layer activations and by setting the
+   * layer status.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& /* unused */)
+  {
+    activations.clear();
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& network)
+  {
+    ResetDeterministic(std::get<I>(network));
+    ResetSeqLen(std::get<I>(network));
+    ResetRecurrent(std::get<I>(network), std::get<I>(network).InputParameter());
+    std::get<I>(network).Delta().zeros();
+
+    ResetParameter<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Reset the layer status by setting the current deterministic parameter
+   * for all layer that implement the Deterministic function.
+   */
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& layer)
+  {
+    layer.Deterministic() = deterministic;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& /* unused */) { /* Nothing to do here */ }
+
+  /**
+   * Reset the layer sequence length by setting the current seqLen parameter
+   * for all layer that implement the SeqLen function.
+   */
+  template<typename T>
+  typename std::enable_if<
+      HasSeqLenCheck<T, size_t&(T::*)(void)>::value, void>::type
+  ResetSeqLen(T& layer)
+  {
+    layer.SeqLen() = seqLen;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasSeqLenCheck<T, size_t&(T::*)(void)>::value, void>::type
+  ResetSeqLen(T& /* unused */) { /* Nothing to do here */ }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when resetting
+   * the recurrent parameter.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  ResetRecurrent(T& layer, P& /* unused */)
+  {
+    layer.RecurrentParameter().zeros();
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  ResetRecurrent(T& /* unused */, P& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Initialize the network by setting the input size and output size.
+   */
+  template<size_t I = 0, typename InputDataType, typename TargetDataType,
+      typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp) - 1, void>::type
+  InitLayer(const InputDataType& /* unused */,
+            const TargetDataType& target,
+            std::tuple<Tp...>& /* unused */)
+  {
+    seqOutput = outputSize < target.n_elem ? true : false;
+  }
+
+  template<size_t I = 0, typename InputDataType, typename TargetDataType,
+      typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp) - 1, void>::type
+  InitLayer(const InputDataType& input,
+            const TargetDataType& target,
+            std::tuple<Tp...>& network)
+  {
+    Init(std::get<I>(network), std::get<I>(network).OutputParameter(),
+       std::get<I + 1>(network).Delta());
+
+    InitLayer<I + 1, InputDataType, TargetDataType, Tp...>(input, target,
+        network);
+  }
+
+  /**
+   * Retrieve the weight matrix for all layer that implement the Weights
+   * function to extract the input size and output size.
+   */
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  Init(T& layer, P& /* unused */, D& /* unused */)
+  {
+    // Initialize the input size only once.
+    if (!inputSize)
+      inputSize = layer.Weights().n_cols;
+
+    outputSize = layer.Weights().n_rows;
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      !HasGradientCheck<T, P&(T::*)()>::value, void>::type
+  Init(T& /* unused */, P& /* unused */, D& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Save the network layer activations.
+   */
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I == Max, void>::type
+  SaveActivations(std::tuple<Tp...>& /* unused */)
+  {
+    Save(I, std::get<I>(network), std::get<I>(network).InputParameter());
+    LinkRecurrent(network);
+  }
+
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I < Max, void>::type
+  SaveActivations(std::tuple<Tp...>& network)
+  {
+    Save(I, std::get<I>(network), std::get<I>(network).InputParameter());
+    SaveActivations<I + 1, Max, Tp...>(network);
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when storing
+   * the activations.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Save(const size_t layerNumber, T& layer, P& /* unused */)
+  {
+    if (activations.size() == layerNumber)
+    {
+      activations.push_back(new arma::mat(layer.RecurrentParameter().n_rows,
+          seqLen));
+    }
+
+    activations[layerNumber].unsafe_col(seqNum) = layer.RecurrentParameter();
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Save(const size_t layerNumber, T& layer, P& /* unused */)
+  {
+    if (activations.size() == layerNumber)
+    {
+      activations.push_back(new arma::mat(layer.OutputParameter().n_rows,
+          seqLen));
+    }
+
+    activations[layerNumber].unsafe_col(seqNum) = layer.OutputParameter();
+  }
+
+  /**
+   * Load the network layer activations.
+   */
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename DataType, typename... Tp
+  >
+  typename std::enable_if<I == Max, void>::type
+  LoadActivations(DataType& input, std::tuple<Tp...>& network)
+  {
+    Load(I, std::get<I>(network), std::get<I>(network).InputParameter());
+    std::get<0>(network).InputParameter() = input;
+  }
+
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename DataType, typename... Tp
+  >
+  typename std::enable_if<I < Max, void>::type
+  LoadActivations(DataType& input, std::tuple<Tp...>& network)
+  {
+    Load(I, std::get<I>(network), std::get<I>(network).InputParameter());
+    LoadActivations<I + 1, Max, DataType, Tp...>(input, network);
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when storing
+   * the activations.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Load(const size_t layerNumber, T& layer, P& /* unused */)
+  {
+    layer.RecurrentParameter() = activations[layerNumber].unsafe_col(seqNum);
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Load(const size_t layerNumber, T& layer, P& /* unused */)
+  {
+    layer.OutputParameter() = activations[layerNumber].unsafe_col(seqNum);
+  }
+
+  /**
+   * Run a single iteration of the feed forward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  void Forward(const DataType& input, std::tuple<Tp...>& network)
+  {
+    std::get<I>(network).InputParameter() = input;
+    std::get<I>(network).Forward(std::get<I>(network).InputParameter(),
+        std::get<I>(network).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& /* unused */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    std::get<I>(network).Forward(std::get<I - 1>(network).OutputParameter(),
+        std::get<I>(network).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Link the calculated activation with the correct layer.
+   */
+  template<
+      size_t I = 1,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I == Max, void>::type
+  LinkParameter(std::tuple<Tp ...>& /* unused */)
+  {
+    if (!LayerTraits<typename std::remove_reference<
+        decltype(std::get<I>(network))>::type>::IsBiasLayer)
+    {
+      std::get<I>(network).InputParameter() = std::get<I - 1>(
+          network).OutputParameter();
+    }
+  }
+
+  template<
+      size_t I = 1,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I < Max, void>::type
+  LinkParameter(std::tuple<Tp...>& network)
+  {
+    if (!LayerTraits<typename std::remove_reference<
+        decltype(std::get<I>(network))>::type>::IsBiasLayer)
+    {
+      std::get<I>(network).InputParameter() = std::get<I - 1>(
+          network).OutputParameter();
+    }
+
+    LinkParameter<I + 1, Max, Tp...>(network);
+  }
+
+  /**
+   * Link the calculated activation with the correct recurrent layer.
+   */
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I == Max, void>::type
+  LinkRecurrent(std::tuple<Tp ...>& /* unused */) { /* Nothing to do here */ }
+
+  template<
+      size_t I = 0,
+      size_t Max = std::tuple_size<LayerTypes>::value - 1,
+      typename... Tp
+  >
+  typename std::enable_if<I < Max, void>::type
+  LinkRecurrent(std::tuple<Tp...>& network)
+  {
+    UpdateRecurrent(std::get<I>(network), std::get<I>(network).InputParameter(),
+        std::get<I + 1>(network).OutputParameter());
+    LinkRecurrent<I + 1, Max, Tp...>(network);
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when updating
+   * the recurrent activations.
+   */
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  UpdateRecurrent(T& layer, P& /* unused */, D& output)
+  {
+    layer.RecurrentParameter() = output;
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  UpdateRecurrent(T& /* unused */, P& /* unused */, D& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  /*
+   * Calculate the output error and update the overall error.
+   */
+  template<typename DataType, typename ErrorType, typename... Tp>
+  double OutputError(const DataType& target,
+                     ErrorType& error,
+                     const std::tuple<Tp...>& network)
+  {
+    // Calculate and store the output error.
+    outputLayer.CalculateError(
+        std::get<sizeof...(Tp) - 1>(network).OutputParameter(), target, error);
+
+    // Masures the network's performance with the specified performance
+    // function.
+    return performanceFunc.Error(network, target, error);
+  }
+
+  /**
+   * Run a single iteration of the feed backward algorithm, using the given
+   * error of the output layer. Note that we iterate backward through the
+   * layer modules.
+   */
+  template<size_t I = 1, typename DataType, typename... Tp>
+  void Backward(DataType& error, std::tuple<Tp ...>& network)
+  {
+    std::get<sizeof...(Tp) - I>(network).Backward(
+        std::get<sizeof...(Tp) - I>(network).OutputParameter(), error,
+        std::get<sizeof...(Tp) - I>(network).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, network);
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& /* unused */, std::tuple<Tp...>& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& error, std::tuple<Tp...>& network)
+  {
+    BackwardRecurrent(std::get<sizeof...(Tp) - I - 1>(network),
+        std::get<sizeof...(Tp) - I - 1>(network).InputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(network).Delta());
+
+    std::get<sizeof...(Tp) - I>(network).Backward(
+        std::get<sizeof...(Tp) - I>(network).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(network).Delta(),
+        std::get<sizeof...(Tp) - I>(network).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, network);
+  }
+
+  /*
+   * Update the delta of the recurrent layer.
+   */
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  BackwardRecurrent(T& layer, P& /* unused */, D& delta)
+  {
+    if (!layer.Delta().is_empty())
+      delta += layer.Delta();
+  }
+
+  template<typename T, typename P, typename D>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  BackwardRecurrent(T& /* unused */, P& /* unused */, D& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
+   */
+  template<size_t I = 0, size_t Max = std::tuple_size<LayerTypes>::value - 2,
+      typename... Tp>
+  typename std::enable_if<I == Max, void>::type
+  UpdateGradients(std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
+        std::get<I + 1>(network).Delta(), std::get<I + 1>(network),
+        std::get<I + 1>(network).InputParameter(),
+        std::get<I + 1>(network).Delta());
+  }
+
+  template<size_t I = 0, size_t Max = std::tuple_size<LayerTypes>::value - 2,
+      typename... Tp>
+  typename std::enable_if<I < Max, void>::type
+  UpdateGradients(std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network), std::get<I>(network).OutputParameter(),
+        std::get<I + 1>(network).Delta(), std::get<I + 1>(network),
+        std::get<I + 1>(network).InputParameter(),
+        std::get<I + 2>(network).Delta());
+
+    UpdateGradients<I + 1, Max, Tp...>(network);
+  }
+
+  template<typename T1, typename P1, typename D1, typename T2, typename P2,
+      typename D2>
+  typename std::enable_if<
+      HasGradientCheck<T1, P1&(T1::*)()>::value &&
+      HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
+  Update(T1& layer, P1& /* unused */, D1& /* unused */, T2& /* unused */,
+         P2& /* unused */, D2& delta2)
+  {
+    layer.Gradient(layer.InputParameter(), delta2, layer.Gradient());
+  }
+
+  template<typename T1, typename P1, typename D1, typename T2, typename P2,
+      typename D2>
+  typename std::enable_if<
+      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
+      !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value) ||
+      (!HasGradientCheck<T1, P1&(T1::*)()>::value &&
+      HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value), void>::type
+  Update(T1& /* unused */, P1& /* unused */, D1& /* unused */, T2& /* unused */,
+         P2& /* unused */, D2& /* unused */)
+  {
+    /* Nothing to do here */
+  }
+
+  template<typename T1, typename P1, typename D1, typename T2, typename P2,
+      typename D2>
+  typename std::enable_if<
+      HasGradientCheck<T1, P1&(T1::*)()>::value &&
+      !HasRecurrentParameterCheck<T2, P2&(T2::*)()>::value, void>::type
+  Update(T1& layer, P1& /* unused */, D1& delta1, T2& /* unused */,
+         P2& /* unused */, D2& /* unused */)
+  {
+    layer.Gradient(layer.InputParameter(), delta1, layer.Gradient());
+  }
+
+  /*
+   * Calculate and store the output activation.
+   */
+  template<typename DataType, typename... Tp>
+  void OutputPrediction(DataType& output, std::tuple<Tp...>& network)
+  {
+    // Calculate and store the output prediction.
+    outputLayer.OutputClass(std::get<sizeof...(Tp) - 1>(
+        network).OutputParameter(), output);
+  }
+
+  //! Instantiated recurrent neural network.
+  LayerTypes network;
+
+  //! The outputlayer used to evaluate the network
+  OutputLayerType& outputLayer;
+
+  //! Performance strategy used to claculate the error.
+  PerformanceFunction performanceFunc;
+
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
+
+  //! Matrix of (trained) parameters.
+  arma::mat parameter;
+
+  //! The matrix of data points (predictors).
+  arma::mat predictors;
+
+  //! The matrix of responses to the input data points.
+  arma::mat responses;
+
+  //! Locally stored network input size.
+  size_t inputSize;
+
+  //! Locally stored network output size.
+  size_t outputSize;
+
+  //! The index of the current sequence number.
+  size_t seqNum;
+
+  //! Locally stored number of samples in one input sequence.
+  size_t seqLen;
+
+  //! Locally stored parameter that indicates if the input is a sequence.
+  bool seqOutput;
+
+  //! The activation storage we are using to perform the feed backward pass.
+  boost::ptr_vector<arma::mat> activations;
+
+  //! The number of separable functions (the number of predictor points).
+  size_t numFunctions;
+
+  //! Locally stored backward error.
+  arma::mat error;
+}; // class RNN
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "rnn_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/approx_kfn/approx_kfn_main.cpp b/src/mlpack/methods/approx_kfn/approx_kfn_main.cpp
index 0342495..08ee540 100644
--- a/src/mlpack/methods/approx_kfn/approx_kfn_main.cpp
+++ b/src/mlpack/methods/approx_kfn/approx_kfn_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
 #include "drusilla_select.hpp"
 #include "qdafn.hpp"
diff --git a/src/mlpack/methods/approx_kfn/drusilla_select.hpp b/src/mlpack/methods/approx_kfn/drusilla_select.hpp
index c059c77..b3aab05 100644
--- a/src/mlpack/methods/approx_kfn/drusilla_select.hpp
+++ b/src/mlpack/methods/approx_kfn/drusilla_select.hpp
@@ -30,7 +30,7 @@
 #ifndef MLPACK_METHODS_APPROX_KFN_DRUSILLA_SELECT_HPP
 #define MLPACK_METHODS_APPROX_KFN_DRUSILLA_SELECT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/approx_kfn/qdafn.hpp b/src/mlpack/methods/approx_kfn/qdafn.hpp
index 95f2de2..d54eb83 100644
--- a/src/mlpack/methods/approx_kfn/qdafn.hpp
+++ b/src/mlpack/methods/approx_kfn/qdafn.hpp
@@ -24,7 +24,8 @@
 #ifndef MLPACK_METHODS_APPROX_KFN_QDAFN_HPP
 #define MLPACK_METHODS_APPROX_KFN_QDAFN_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/dists/gaussian_distribution.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 9f0f772..0e30cef 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -16,7 +16,7 @@
 #ifndef MLPACK_METHODS_CF_CF_HPP
 #define MLPACK_METHODS_CF_CF_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
 #include <mlpack/methods/amf/amf.hpp>
 #include <mlpack/methods/amf/update_rules/nmf_als.hpp>
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index a530a2a..df86b06 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -10,8 +10,8 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 
-#include <mlpack/core.hpp>
-
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
 #include <mlpack/methods/amf/amf.hpp>
 #include <mlpack/methods/regularized_svd/regularized_svd.hpp>
 #include <mlpack/methods/amf/termination_policies/max_iteration_termination.hpp>
diff --git a/src/mlpack/methods/cf/svd_wrapper.hpp b/src/mlpack/methods/cf/svd_wrapper.hpp
index 27b225e..3ae381c 100644
--- a/src/mlpack/methods/cf/svd_wrapper.hpp
+++ b/src/mlpack/methods/cf/svd_wrapper.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_SVDWRAPPER_HPP
 #define MLPACK_METHODS_SVDWRAPPER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack
 {
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index b58ff4c..5918aaa 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_HPP
 #define MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace decision_stump {
diff --git a/src/mlpack/methods/decision_stump/decision_stump_main.cpp b/src/mlpack/methods/decision_stump/decision_stump_main.cpp
index 8f7e371..5e832f5 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_main.cpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_main.cpp
@@ -9,7 +9,9 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/data/normalize_labels.hpp>
 #include "decision_stump.hpp"
 
 using namespace mlpack;
diff --git a/src/mlpack/methods/det/det_main.cpp b/src/mlpack/methods/det/det_main.cpp
index 024c702..1ee7a7b 100644
--- a/src/mlpack/methods/det/det_main.cpp
+++ b/src/mlpack/methods/det/det_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include "dt_utils.hpp"
 
 using namespace mlpack;
diff --git a/src/mlpack/methods/det/dt_utils.hpp b/src/mlpack/methods/det/dt_utils.hpp
index 7e536e1..d807ac3 100644
--- a/src/mlpack/methods/det/dt_utils.hpp
+++ b/src/mlpack/methods/det/dt_utils.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_DET_DT_UTILS_HPP
 #define MLPACK_METHODS_DET_DT_UTILS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "dtree.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/methods/det/dtree.hpp b/src/mlpack/methods/det/dtree.hpp
index 3c14b29..5d8a4d0 100644
--- a/src/mlpack/methods/det/dtree.hpp
+++ b/src/mlpack/methods/det/dtree.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_DET_DTREE_HPP
 #define MLPACK_METHODS_DET_DTREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace det /** Density Estimation Trees */ {
diff --git a/src/mlpack/methods/emst/dtb.hpp b/src/mlpack/methods/emst/dtb.hpp
index 408b1a5..c682b4a 100644
--- a/src/mlpack/methods/emst/dtb.hpp
+++ b/src/mlpack/methods/emst/dtb.hpp
@@ -28,7 +28,7 @@
 #include "dtb_stat.hpp"
 #include "edge_pair.hpp"
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 
 #include <mlpack/core/tree/binary_space_tree.hpp>
diff --git a/src/mlpack/methods/emst/dtb_rules.hpp b/src/mlpack/methods/emst/dtb_rules.hpp
index 00e32d2..ee9c319 100644
--- a/src/mlpack/methods/emst/dtb_rules.hpp
+++ b/src/mlpack/methods/emst/dtb_rules.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_EMST_DTB_RULES_HPP
 #define MLPACK_METHODS_EMST_DTB_RULES_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/core/tree/traversal_info.hpp>
 
diff --git a/src/mlpack/methods/emst/dtb_stat.hpp b/src/mlpack/methods/emst/dtb_stat.hpp
index 01ec10f..ffa5f2b 100644
--- a/src/mlpack/methods/emst/dtb_stat.hpp
+++ b/src/mlpack/methods/emst/dtb_stat.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_EMST_DTB_STAT_HPP
 #define MLPACK_METHODS_EMST_DTB_STAT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace emst {
diff --git a/src/mlpack/methods/emst/edge_pair.hpp b/src/mlpack/methods/emst/edge_pair.hpp
index 521ad8d..b2a4ebb 100644
--- a/src/mlpack/methods/emst/edge_pair.hpp
+++ b/src/mlpack/methods/emst/edge_pair.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_EMST_EDGE_PAIR_HPP
 #define MLPACK_METHODS_EMST_EDGE_PAIR_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "union_find.hpp"
 
diff --git a/src/mlpack/methods/emst/emst_main.cpp b/src/mlpack/methods/emst/emst_main.cpp
index 96acb72..4a2da05 100644
--- a/src/mlpack/methods/emst/emst_main.cpp
+++ b/src/mlpack/methods/emst/emst_main.cpp
@@ -26,7 +26,7 @@
  */
 #include "dtb.hpp"
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 PROGRAM_INFO("Fast Euclidean Minimum Spanning Tree", "This program can compute "
     "the Euclidean minimum spanning tree of a set of input points using the "
diff --git a/src/mlpack/methods/emst/union_find.hpp b/src/mlpack/methods/emst/union_find.hpp
index 912252c..6c9fba2 100644
--- a/src/mlpack/methods/emst/union_find.hpp
+++ b/src/mlpack/methods/emst/union_find.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_METHODS_EMST_UNION_FIND_HPP
 #define MLPACK_METHODS_EMST_UNION_FIND_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace emst {
diff --git a/src/mlpack/methods/fastmks/fastmks.hpp b/src/mlpack/methods/fastmks/fastmks.hpp
index 866be5d..94b6b06 100644
--- a/src/mlpack/methods/fastmks/fastmks.hpp
+++ b/src/mlpack/methods/fastmks/fastmks.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_FASTMKS_FASTMKS_HPP
 #define MLPACK_METHODS_FASTMKS_FASTMKS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/ip_metric.hpp>
 #include "fastmks_stat.hpp"
 #include <mlpack/core/tree/cover_tree.hpp>
diff --git a/src/mlpack/methods/fastmks/fastmks_main.cpp b/src/mlpack/methods/fastmks/fastmks_main.cpp
index d5b7311..989b8c1 100644
--- a/src/mlpack/methods/fastmks/fastmks_main.cpp
+++ b/src/mlpack/methods/fastmks/fastmks_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 
 #include "fastmks.hpp"
 #include "fastmks_model.hpp"
diff --git a/src/mlpack/methods/fastmks/fastmks_model.hpp b/src/mlpack/methods/fastmks/fastmks_model.hpp
index fce28fd..44fa102 100644
--- a/src/mlpack/methods/fastmks/fastmks_model.hpp
+++ b/src/mlpack/methods/fastmks/fastmks_model.hpp
@@ -12,8 +12,19 @@
 #ifndef MLPACK_METHODS_FASTMKS_FASTMKS_MODEL_HPP
 #define MLPACK_METHODS_FASTMKS_FASTMKS_MODEL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "fastmks.hpp"
+#include <mlpack/core/kernels/kernel_traits.hpp>
+#include <mlpack/core/kernels/linear_kernel.hpp>
+#include <mlpack/core/kernels/polynomial_kernel.hpp>
+#include <mlpack/core/kernels/cosine_distance.hpp>
+#include <mlpack/core/kernels/gaussian_kernel.hpp>
+#include <mlpack/core/kernels/epanechnikov_kernel.hpp>
+#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
+#include <mlpack/core/kernels/laplacian_kernel.hpp>
+#include <mlpack/core/kernels/pspectrum_string_kernel.hpp>
+#include <mlpack/core/kernels/spherical_kernel.hpp>
+#include <mlpack/core/kernels/triangular_kernel.hpp>
 
 namespace mlpack {
 namespace fastmks {
diff --git a/src/mlpack/methods/fastmks/fastmks_rules.hpp b/src/mlpack/methods/fastmks/fastmks_rules.hpp
index 1388812..2002090 100644
--- a/src/mlpack/methods/fastmks/fastmks_rules.hpp
+++ b/src/mlpack/methods/fastmks/fastmks_rules.hpp
@@ -12,7 +12,8 @@
 #ifndef MLPACK_METHODS_FASTMKS_FASTMKS_RULES_HPP
 #define MLPACK_METHODS_FASTMKS_FASTMKS_RULES_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/kernels/kernel_traits.hpp>
 #include <mlpack/core/tree/cover_tree/cover_tree.hpp>
 #include <mlpack/core/tree/traversal_info.hpp>
 #include <boost/heap/priority_queue.hpp>
diff --git a/src/mlpack/methods/fastmks/fastmks_stat.hpp b/src/mlpack/methods/fastmks/fastmks_stat.hpp
index 4ce12f3..93cd6fd 100644
--- a/src/mlpack/methods/fastmks/fastmks_stat.hpp
+++ b/src/mlpack/methods/fastmks/fastmks_stat.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_FASTMKS_FASTMKS_STAT_HPP
 #define MLPACK_METHODS_FASTMKS_FASTMKS_STAT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/tree_traits.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/gmm/diagonal_constraint.hpp b/src/mlpack/methods/gmm/diagonal_constraint.hpp
index 0d15876..e0eafc0 100644
--- a/src/mlpack/methods/gmm/diagonal_constraint.hpp
+++ b/src/mlpack/methods/gmm/diagonal_constraint.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_GMM_DIAGONAL_CONSTRAINT_HPP
 #define MLPACK_METHODS_GMM_DIAGONAL_CONSTRAINT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace gmm {
diff --git a/src/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp b/src/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp
index 8a2c07b..34c9072 100644
--- a/src/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp
+++ b/src/mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_GMM_EIGENVALUE_RATIO_CONSTRAINT_HPP
 #define MLPACK_METHODS_GMM_EIGENVALUE_RATIO_CONSTRAINT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace gmm {
diff --git a/src/mlpack/methods/gmm/em_fit.hpp b/src/mlpack/methods/gmm/em_fit.hpp
index 768c1ec..71817f8 100644
--- a/src/mlpack/methods/gmm/em_fit.hpp
+++ b/src/mlpack/methods/gmm/em_fit.hpp
@@ -14,7 +14,8 @@
 #ifndef MLPACK_METHODS_GMM_EM_FIT_HPP
 #define MLPACK_METHODS_GMM_EM_FIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/dists/gaussian_distribution.hpp>
 
 // Default clustering mechanism.
 #include <mlpack/methods/kmeans/kmeans.hpp>
diff --git a/src/mlpack/methods/gmm/gmm.hpp b/src/mlpack/methods/gmm/gmm.hpp
index 1ba2540..6209804 100644
--- a/src/mlpack/methods/gmm/gmm.hpp
+++ b/src/mlpack/methods/gmm/gmm.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_MOG_MOG_EM_HPP
 #define MLPACK_METHODS_MOG_MOG_EM_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 // This is the default fitting method class.
 #include "em_fit.hpp"
diff --git a/src/mlpack/methods/gmm/gmm_generate_main.cpp b/src/mlpack/methods/gmm/gmm_generate_main.cpp
index d56b5a6..01d594a 100644
--- a/src/mlpack/methods/gmm/gmm_generate_main.cpp
+++ b/src/mlpack/methods/gmm/gmm_generate_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "gmm.hpp"
 
 using namespace std;
diff --git a/src/mlpack/methods/gmm/gmm_probability_main.cpp b/src/mlpack/methods/gmm/gmm_probability_main.cpp
index a7deb08..e9ed34f 100644
--- a/src/mlpack/methods/gmm/gmm_probability_main.cpp
+++ b/src/mlpack/methods/gmm/gmm_probability_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "gmm.hpp"
 
 using namespace std;
diff --git a/src/mlpack/methods/gmm/gmm_train_main.cpp b/src/mlpack/methods/gmm/gmm_train_main.cpp
index 2eba886..7ae2890 100644
--- a/src/mlpack/methods/gmm/gmm_train_main.cpp
+++ b/src/mlpack/methods/gmm/gmm_train_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "gmm.hpp"
 #include "no_constraint.hpp"
diff --git a/src/mlpack/methods/gmm/no_constraint.hpp b/src/mlpack/methods/gmm/no_constraint.hpp
index 35d0cce..675964e 100644
--- a/src/mlpack/methods/gmm/no_constraint.hpp
+++ b/src/mlpack/methods/gmm/no_constraint.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_GMM_NO_CONSTRAINT_HPP
 #define MLPACK_METHODS_GMM_NO_CONSTRAINT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace gmm {
diff --git a/src/mlpack/methods/gmm/positive_definite_constraint.hpp b/src/mlpack/methods/gmm/positive_definite_constraint.hpp
index 5b287fe..07a5289 100644
--- a/src/mlpack/methods/gmm/positive_definite_constraint.hpp
+++ b/src/mlpack/methods/gmm/positive_definite_constraint.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_GMM_POSITIVE_DEFINITE_CONSTRAINT_HPP
 #define MLPACK_METHODS_GMM_POSITIVE_DEFINITE_CONSTRAINT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace gmm {
diff --git a/src/mlpack/methods/hmm/hmm.hpp b/src/mlpack/methods/hmm/hmm.hpp
index e9b3541..a6d954b 100644
--- a/src/mlpack/methods/hmm/hmm.hpp
+++ b/src/mlpack/methods/hmm/hmm.hpp
@@ -14,7 +14,8 @@
 #ifndef MLPACK_METHODS_HMM_HMM_HPP
 #define MLPACK_METHODS_HMM_HMM_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/dists/discrete_distribution.hpp>
 
 namespace mlpack {
 namespace hmm /** Hidden Markov Models. */ {
diff --git a/src/mlpack/methods/hmm/hmm_generate_main.cpp b/src/mlpack/methods/hmm/hmm_generate_main.cpp
index 99cf66a..b7a6b84 100644
--- a/src/mlpack/methods/hmm/hmm_generate_main.cpp
+++ b/src/mlpack/methods/hmm/hmm_generate_main.cpp
@@ -11,7 +11,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "hmm.hpp"
 #include "hmm_util.hpp"
diff --git a/src/mlpack/methods/hmm/hmm_loglik_main.cpp b/src/mlpack/methods/hmm/hmm_loglik_main.cpp
index accaa67..88ea511 100644
--- a/src/mlpack/methods/hmm/hmm_loglik_main.cpp
+++ b/src/mlpack/methods/hmm/hmm_loglik_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "hmm.hpp"
 #include "hmm_util.hpp"
diff --git a/src/mlpack/methods/hmm/hmm_regression.hpp b/src/mlpack/methods/hmm/hmm_regression.hpp
index 1319c75..bfde35a 100644
--- a/src/mlpack/methods/hmm/hmm_regression.hpp
+++ b/src/mlpack/methods/hmm/hmm_regression.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_HMM_HMM_REGRESSION_HPP
 #define MLPACK_METHODS_HMM_HMM_REGRESSION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/dists/regression_distribution.hpp>
 #include "hmm.hpp"
 
diff --git a/src/mlpack/methods/hmm/hmm_train_main.cpp b/src/mlpack/methods/hmm/hmm_train_main.cpp
index 1f1ab76..20722b0 100644
--- a/src/mlpack/methods/hmm/hmm_train_main.cpp
+++ b/src/mlpack/methods/hmm/hmm_train_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "hmm.hpp"
 #include "hmm_util.hpp"
diff --git a/src/mlpack/methods/hmm/hmm_util.hpp b/src/mlpack/methods/hmm/hmm_util.hpp
index b64af48..5b56d16 100644
--- a/src/mlpack/methods/hmm/hmm_util.hpp
+++ b/src/mlpack/methods/hmm/hmm_util.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_HMM_HMM_UTIL_HPP
 #define MLPACK_METHODS_HMM_HMM_UTIL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace hmm {
diff --git a/src/mlpack/methods/hmm/hmm_util_impl.hpp b/src/mlpack/methods/hmm/hmm_util_impl.hpp
index 856e47f..f68f300 100644
--- a/src/mlpack/methods/hmm/hmm_util_impl.hpp
+++ b/src/mlpack/methods/hmm/hmm_util_impl.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_HMM_HMM_UTIL_IMPL_HPP
 #define MLPACK_METHODS_HMM_HMM_UTIL_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/methods/hmm/hmm.hpp>
 #include <mlpack/methods/gmm/gmm.hpp>
diff --git a/src/mlpack/methods/hmm/hmm_viterbi_main.cpp b/src/mlpack/methods/hmm/hmm_viterbi_main.cpp
index 38bef6f..9ad21ed 100644
--- a/src/mlpack/methods/hmm/hmm_viterbi_main.cpp
+++ b/src/mlpack/methods/hmm/hmm_viterbi_main.cpp
@@ -10,7 +10,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "hmm.hpp"
 #include "hmm_util.hpp"
diff --git a/src/mlpack/methods/hoeffding_trees/binary_numeric_split_info.hpp b/src/mlpack/methods/hoeffding_trees/binary_numeric_split_info.hpp
index a8ba23e..d605a25 100644
--- a/src/mlpack/methods/hoeffding_trees/binary_numeric_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/binary_numeric_split_info.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_BINARY_NUMERIC_SPLIT_INFO_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_BINARY_NUMERIC_SPLIT_INFO_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
index 24b7a68..41625dd 100644
--- a/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/categorical_split_info.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_CATEGORICAL_SPLIT_INFO_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_CATEGORICAL_SPLIT_INFO_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/methods/hoeffding_trees/gini_impurity.hpp b/src/mlpack/methods/hoeffding_trees/gini_impurity.hpp
index fda66f3..8952bb7 100644
--- a/src/mlpack/methods/hoeffding_trees/gini_impurity.hpp
+++ b/src/mlpack/methods/hoeffding_trees/gini_impurity.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_GINI_INDEX_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_GINI_INDEX_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
index e9b3f87..21a605b 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_categorical_split.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_CATEGORICAL_SPLIT_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_CATEGORICAL_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "categorical_split_info.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
index feff1cb..60c6b3c 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_numeric_split.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "numeric_split_info.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
index 1f6c683..cad54aa 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp
@@ -13,7 +13,8 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/data/dataset_mapper.hpp>
 #include "gini_impurity.hpp"
 #include "hoeffding_numeric_split.hpp"
 #include "hoeffding_categorical_split.hpp"
diff --git a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
index a4a618f..f268d3c 100644
--- a/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
+++ b/src/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include <mlpack/methods/hoeffding_trees/hoeffding_tree.hpp>
 #include <mlpack/methods/hoeffding_trees/binary_numeric_split.hpp>
 #include <mlpack/methods/hoeffding_trees/information_gain.hpp>
diff --git a/src/mlpack/methods/hoeffding_trees/numeric_split_info.hpp b/src/mlpack/methods/hoeffding_trees/numeric_split_info.hpp
index 1e947c2..b09e1b1 100644
--- a/src/mlpack/methods/hoeffding_trees/numeric_split_info.hpp
+++ b/src/mlpack/methods/hoeffding_trees/numeric_split_info.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_HOEFFDING_TREES_NUMERIC_SPLIT_INFO_HPP
 #define MLPACK_METHODS_HOEFFDING_TREES_NUMERIC_SPLIT_INFO_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace tree {
diff --git a/src/mlpack/methods/kernel_pca/kernel_pca.hpp b/src/mlpack/methods/kernel_pca/kernel_pca.hpp
index 3c54c23..bbe8b5d 100644
--- a/src/mlpack/methods/kernel_pca/kernel_pca.hpp
+++ b/src/mlpack/methods/kernel_pca/kernel_pca.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_KERNEL_PCA_KERNEL_PCA_HPP
 #define MLPACK_METHODS_KERNEL_PCA_KERNEL_PCA_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/kernel_pca/kernel_rules/naive_method.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp b/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
index 0a36a8a..357bab8 100644
--- a/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
+++ b/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
@@ -9,7 +9,21 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/math/random.hpp>
+#include <mlpack/core/kernels/kernel_traits.hpp>
+#include <mlpack/core/kernels/linear_kernel.hpp>
+#include <mlpack/core/kernels/polynomial_kernel.hpp>
+#include <mlpack/core/kernels/cosine_distance.hpp>
+#include <mlpack/core/kernels/gaussian_kernel.hpp>
+#include <mlpack/core/kernels/epanechnikov_kernel.hpp>
+#include <mlpack/core/kernels/hyperbolic_tangent_kernel.hpp>
+#include <mlpack/core/kernels/laplacian_kernel.hpp>
+#include <mlpack/core/kernels/pspectrum_string_kernel.hpp>
+#include <mlpack/core/kernels/spherical_kernel.hpp>
+#include <mlpack/core/kernels/triangular_kernel.hpp>
+#include <mlpack/methods/hoeffding_trees/hoeffding_tree.hpp>
 #include <mlpack/methods/nystroem_method/ordered_selection.hpp>
 #include <mlpack/methods/nystroem_method/random_selection.hpp>
 #include <mlpack/methods/nystroem_method/kmeans_selection.hpp>
diff --git a/src/mlpack/methods/kernel_pca/kernel_rules/naive_method.hpp b/src/mlpack/methods/kernel_pca/kernel_rules/naive_method.hpp
index 80b76c8..cb45b58 100644
--- a/src/mlpack/methods/kernel_pca/kernel_rules/naive_method.hpp
+++ b/src/mlpack/methods/kernel_pca/kernel_rules/naive_method.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_KERNEL_PCA_NAIVE_METHOD_HPP
 #define MLPACK_METHODS_KERNEL_PCA_NAIVE_METHOD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kpca {
diff --git a/src/mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp b/src/mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp
index 9e2eff6..a8f8241 100644
--- a/src/mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp
+++ b/src/mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_KERNEL_PCA_NYSTROEM_METHOD_HPP
 #define MLPACK_METHODS_KERNEL_PCA_NYSTROEM_METHOD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/nystroem_method/kmeans_selection.hpp>
 #include <mlpack/methods/nystroem_method/nystroem_method.hpp>
 
diff --git a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
index 6bcfff2..24c3c61 100644
--- a/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/allow_empty_clusters.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_KMEANS_ALLOW_EMPTY_CLUSTERS_HPP
 #define MLPACK_METHODS_KMEANS_ALLOW_EMPTY_CLUSTERS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kmeans {
diff --git a/src/mlpack/methods/kmeans/kill_empty_clusters.hpp b/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
index 648cb5e..d4a0bb4 100644
--- a/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
+++ b/src/mlpack/methods/kmeans/kill_empty_clusters.hpp
@@ -13,7 +13,7 @@
 #ifndef __MLPACK_METHODS_KMEANS_KILL_EMPTY_CLUSTERS_HPP
 #define __MLPACK_METHODS_KMEANS_KILL_EMPTY_CLUSTERS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kmeans {
diff --git a/src/mlpack/methods/kmeans/kmeans.hpp b/src/mlpack/methods/kmeans/kmeans.hpp
index 6d2a6f9..af51cea 100644
--- a/src/mlpack/methods/kmeans/kmeans.hpp
+++ b/src/mlpack/methods/kmeans/kmeans.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_KMEANS_KMEANS_HPP
 #define MLPACK_METHODS_KMEANS_KMEANS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/core/metrics/lmetric.hpp>
 #include "sample_initialization.hpp"
diff --git a/src/mlpack/methods/kmeans/kmeans_main.cpp b/src/mlpack/methods/kmeans/kmeans_main.cpp
index 9e71c11..aea52c5 100644
--- a/src/mlpack/methods/kmeans/kmeans_main.cpp
+++ b/src/mlpack/methods/kmeans/kmeans_main.cpp
@@ -9,7 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "kmeans.hpp"
 #include "allow_empty_clusters.hpp"
diff --git a/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp b/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
index 220c7f6..c65bbaf 100644
--- a/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
+++ b/src/mlpack/methods/kmeans/max_variance_new_cluster.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_KMEANS_MAX_VARIANCE_NEW_CLUSTER_HPP
 #define MLPACK_METHODS_KMEANS_MAX_VARIANCE_NEW_CLUSTER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kmeans {
diff --git a/src/mlpack/methods/kmeans/random_partition.hpp b/src/mlpack/methods/kmeans/random_partition.hpp
index 29de068..2d1b995 100644
--- a/src/mlpack/methods/kmeans/random_partition.hpp
+++ b/src/mlpack/methods/kmeans/random_partition.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_KMEANS_RANDOM_PARTITION_HPP
 #define MLPACK_METHODS_KMEANS_RANDOM_PARTITION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kmeans {
diff --git a/src/mlpack/methods/kmeans/refined_start.hpp b/src/mlpack/methods/kmeans/refined_start.hpp
index c0bf6a2..719334b 100644
--- a/src/mlpack/methods/kmeans/refined_start.hpp
+++ b/src/mlpack/methods/kmeans/refined_start.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_KMEANS_REFINED_START_HPP
 #define MLPACK_METHODS_KMEANS_REFINED_START_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kmeans {
diff --git a/src/mlpack/methods/kmeans/sample_initialization.hpp b/src/mlpack/methods/kmeans/sample_initialization.hpp
index 48a2244..9c8d47a 100644
--- a/src/mlpack/methods/kmeans/sample_initialization.hpp
+++ b/src/mlpack/methods/kmeans/sample_initialization.hpp
@@ -14,7 +14,8 @@
 #ifndef __MLPACK_METHODS_KMEANS_SAMPLE_INITIALIZATION_HPP
 #define __MLPACK_METHODS_KMEANS_SAMPLE_INITIALIZATION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
 
 namespace mlpack {
 namespace kmeans {
diff --git a/src/mlpack/methods/lars/lars.cpp b/src/mlpack/methods/lars/lars.cpp
index 661fc80..8f86070 100644
--- a/src/mlpack/methods/lars/lars.cpp
+++ b/src/mlpack/methods/lars/lars.cpp
@@ -10,6 +10,8 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "lars.hpp"
+#include <mlpack/core/util/log.hpp>
+#include <mlpack/core/util/timers.hpp>
 
 using namespace mlpack;
 using namespace mlpack::regression;
diff --git a/src/mlpack/methods/lars/lars.hpp b/src/mlpack/methods/lars/lars.hpp
index fb51b1f..d6c5436 100644
--- a/src/mlpack/methods/lars/lars.hpp
+++ b/src/mlpack/methods/lars/lars.hpp
@@ -24,7 +24,7 @@
 #ifndef MLPACK_METHODS_LARS_LARS_HPP
 #define MLPACK_METHODS_LARS_LARS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace regression {
diff --git a/src/mlpack/methods/lars/lars_main.cpp b/src/mlpack/methods/lars/lars_main.cpp
index 73075d3..37d6ffe 100644
--- a/src/mlpack/methods/lars/lars_main.cpp
+++ b/src/mlpack/methods/lars/lars_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 
 #include "lars.hpp"
 
diff --git a/src/mlpack/methods/linear_regression/linear_regression.cpp b/src/mlpack/methods/linear_regression/linear_regression.cpp
index 25167e9..cf38e43 100644
--- a/src/mlpack/methods/linear_regression/linear_regression.cpp
+++ b/src/mlpack/methods/linear_regression/linear_regression.cpp
@@ -11,6 +11,7 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "linear_regression.hpp"
+#include <mlpack/core/util/log.hpp>
 
 using namespace mlpack;
 using namespace mlpack::regression;
diff --git a/src/mlpack/methods/linear_regression/linear_regression.hpp b/src/mlpack/methods/linear_regression/linear_regression.hpp
index 79993bd..7fc9c0b 100644
--- a/src/mlpack/methods/linear_regression/linear_regression.hpp
+++ b/src/mlpack/methods/linear_regression/linear_regression.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_LINEAR_REGRESSION_LINEAR_REGRESSION_HPP
 #define MLPACK_METHODS_LINEAR_REGRESSION_LINEAR_REGRESSION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace regression /** Regression methods. */ {
diff --git a/src/mlpack/methods/linear_regression/linear_regression_main.cpp b/src/mlpack/methods/linear_regression/linear_regression_main.cpp
index b3cf6dd..a9a76f2 100644
--- a/src/mlpack/methods/linear_regression/linear_regression_main.cpp
+++ b/src/mlpack/methods/linear_regression/linear_regression_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include "linear_regression.hpp"
 
 PROGRAM_INFO("Simple Linear Regression and Prediction",
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc.cpp b/src/mlpack/methods/local_coordinate_coding/lcc.cpp
index 31a6eaa..8c420d8 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc.cpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc.cpp
@@ -10,6 +10,7 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "lcc.hpp"
+#include <mlpack/core/math/lin_alg.hpp>
 
 namespace mlpack {
 namespace lcc {
diff --git a/src/mlpack/methods/local_coordinate_coding/lcc.hpp b/src/mlpack/methods/local_coordinate_coding/lcc.hpp
index 28fdd63..6a37fbd 100644
--- a/src/mlpack/methods/local_coordinate_coding/lcc.hpp
+++ b/src/mlpack/methods/local_coordinate_coding/lcc.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_LOCAL_COORDINATE_CODING_LCC_HPP
 #define MLPACK_METHODS_LOCAL_COORDINATE_CODING_LCC_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/lars/lars.hpp>
 
 // Include three simple dictionary initializers from sparse coding.
diff --git a/src/mlpack/methods/local_coordinate_coding/local_coordinate_coding_main.cpp b/src/mlpack/methods/local_coordinate_coding/local_coordinate_coding_main.cpp
index e6967fb..536058c 100644
--- a/src/mlpack/methods/local_coordinate_coding/local_coordinate_coding_main.cpp
+++ b/src/mlpack/methods/local_coordinate_coding/local_coordinate_coding_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include "lcc.hpp"
 
 PROGRAM_INFO("Local Coordinate Coding",
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression.hpp b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
index 8973339..e249ad7 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_HPP
 #define MLPACK_METHODS_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
 
 #include "logistic_regression_function.hpp"
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_function.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_function.hpp
index 4ef2ddf..586d69e 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_function.hpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_function.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_FUNCTION_HPP
 #define MLPACK_METHODS_LOGISTIC_REGRESSION_LOGISTIC_REGRESSION_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace regression {
diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp b/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
index 17557fa..28a61c4 100644
--- a/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
+++ b/src/mlpack/methods/logistic_regression/logistic_regression_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include "logistic_regression.hpp"
 
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
diff --git a/src/mlpack/methods/lsh/lsh_main.cpp b/src/mlpack/methods/lsh/lsh_main.cpp
index 470c248..8618bd7 100644
--- a/src/mlpack/methods/lsh/lsh_main.cpp
+++ b/src/mlpack/methods/lsh/lsh_main.cpp
@@ -10,7 +10,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 
 #include "lsh_search.hpp"
diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp
index ec0b9d4..aa6c56e 100644
--- a/src/mlpack/methods/lsh/lsh_search.hpp
+++ b/src/mlpack/methods/lsh/lsh_search.hpp
@@ -43,7 +43,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp>
diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp
index b09e433..ce12411 100644
--- a/src/mlpack/methods/lsh/lsh_search_impl.hpp
+++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp
@@ -12,7 +12,8 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_IMPL_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/mean_shift/mean_shift.hpp b/src/mlpack/methods/mean_shift/mean_shift.hpp
index 2d6da5a..34f12d6 100644
--- a/src/mlpack/methods/mean_shift/mean_shift.hpp
+++ b/src/mlpack/methods/mean_shift/mean_shift.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_MEAN_SHIFT_MEAN_SHIFT_HPP
 #define MLPACK_METHODS_MEAN_SHIFT_MEAN_SHIFT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/kernels/gaussian_kernel.hpp>
 #include <mlpack/core/kernels/kernel_traits.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
diff --git a/src/mlpack/methods/mean_shift/mean_shift_main.cpp b/src/mlpack/methods/mean_shift/mean_shift_main.cpp
index 270b68d..cea6559 100644
--- a/src/mlpack/methods/mean_shift/mean_shift_main.cpp
+++ b/src/mlpack/methods/mean_shift/mean_shift_main.cpp
@@ -9,8 +9,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/kernels/gaussian_kernel.hpp>
 #include "mean_shift.hpp"
 
diff --git a/src/mlpack/methods/mvu/mvu.hpp b/src/mlpack/methods/mvu/mvu.hpp
new file mode 100644
index 0000000..c1cea30
--- /dev/null
+++ b/src/mlpack/methods/mvu/mvu.hpp
@@ -0,0 +1,48 @@
+/**
+ * @file mvu.hpp
+ * @author Ryan Curtin
+ *
+ * An implementation of Maximum Variance Unfolding.  This file defines an MVU
+ * class as well as a class representing the objective function (a semidefinite
+ * program) which MVU seeks to minimize.  Minimization is performed by the
+ * Augmented Lagrangian optimizer (which in turn uses the L-BFGS optimizer).
+ *
+ * Note: this implementation of MVU does not work.  See #189.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef MLPACK_METHODS_MVU_MVU_HPP
+#define MLPACK_METHODS_MVU_MVU_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace mvu {
+
+/**
+ * The MVU class is meant to provide a good abstraction for users.  The dataset
+ * needs to be provided, as well as several parameters.
+ *
+ * - dataset
+ * - new dimensionality
+ */
+class MVU
+{
+ public:
+  MVU(const arma::mat& dataIn);
+
+  void Unfold(const size_t newDim,
+              const size_t numNeighbors,
+              arma::mat& outputCoordinates);
+
+ private:
+  const arma::mat& data;
+};
+
+} // namespace mvu
+} // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/mvu/mvu_main.cpp b/src/mlpack/methods/mvu/mvu_main.cpp
new file mode 100644
index 0000000..975a8bf
--- /dev/null
+++ b/src/mlpack/methods/mvu/mvu_main.cpp
@@ -0,0 +1,79 @@
+/**
+ * @file mvu_main.cpp
+ * @author Ryan Curtin
+ *
+ * Executable for MVU.
+ *
+ * Note: this implementation of MVU does not work.  See #189.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#include <mlpack/prereqs.hpp>
+#include "mvu.hpp"
+
+PROGRAM_INFO("Maximum Variance Unfolding (MVU)", "This program implements "
+    "Maximum Variance Unfolding, a nonlinear dimensionality reduction "
+    "technique.  The method minimizes dimensionality by unfolding a manifold "
+    "such that the distances to the nearest neighbors of each point are held "
+    "constant.");
+
+PARAM_MATRIX_IN_REQ("input", "Input dataset.", "i");
+PARAM_INT_IN_REQ("new_dim", "New dimensionality of dataset.", "d");
+
+PARAM_MATRIX_OUT("output", "Matrix to save unfolded dataset to.", "o");
+PARAM_INT_IN("num_neighbors", "Number of nearest neighbors to consider while "
+    "unfolding.", "k", 5);
+
+using namespace mlpack;
+using namespace mlpack::mvu;
+using namespace mlpack::math;
+using namespace arma;
+using namespace std;
+
+int main(int argc, char **argv)
+{
+  // Read from command line.
+  CLI::ParseCommandLine(argc, argv);
+  const string inputFile = CLI::GetParam<string>("input_file");
+  const string outputFile = CLI::GetParam<string>("output_file");
+  const int newDim = CLI::GetParam<int>("new_dim");
+  const int numNeighbors = CLI::GetParam<int>("num_neighbors");
+
+  if (!CLI::HasParam("output"))
+    Log::Warn << "--output_file (-o) is not specified; no results will be "
+        << "saved!" << endl;
+
+  RandomSeed(time(NULL));
+
+  // Load input dataset.
+  mat data = std::move(CLI::GetParam<arma::mat>("input"));
+
+  // Verify that the requested dimensionality is valid.
+  if (newDim <= 0 || newDim > (int) data.n_rows)
+  {
+    Log::Fatal << "Invalid new dimensionality (" << newDim << ").  Must be "
+      << "between 1 and the input dataset dimensionality (" << data.n_rows
+      << ")." << std::endl;
+  }
+
+  // Verify that the number of neighbors is valid.
+  if (numNeighbors <= 0 || numNeighbors > (int) data.n_cols)
+  {
+    Log::Fatal << "Invalid number of neighbors (" << numNeighbors << ").  Must "
+        << "be between 1 and the number of points in the input dataset ("
+        << data.n_cols << ")." << std::endl;
+  }
+
+  // Now run MVU.
+  MVU mvu(data);
+
+  mat output;
+  mvu.Unfold(newDim, numNeighbors, output);
+
+  // Save results to file.
+  if (CLI::HasParam("output"))
+    CLI::GetParam<arma::mat>("output") = std::move(output);
+}
diff --git a/src/mlpack/methods/naive_bayes/naive_bayes_classifier.hpp b/src/mlpack/methods/naive_bayes/naive_bayes_classifier.hpp
index 48b7701..efd62bd 100644
--- a/src/mlpack/methods/naive_bayes/naive_bayes_classifier.hpp
+++ b/src/mlpack/methods/naive_bayes/naive_bayes_classifier.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_NAIVE_BAYES_NAIVE_BAYES_CLASSIFIER_HPP
 #define MLPACK_METHODS_NAIVE_BAYES_NAIVE_BAYES_CLASSIFIER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace naive_bayes /** The Naive Bayes Classifier. */ {
diff --git a/src/mlpack/methods/naive_bayes/naive_bayes_classifier_impl.hpp b/src/mlpack/methods/naive_bayes/naive_bayes_classifier_impl.hpp
index b701609..b2d9bfe 100644
--- a/src/mlpack/methods/naive_bayes/naive_bayes_classifier_impl.hpp
+++ b/src/mlpack/methods/naive_bayes/naive_bayes_classifier_impl.hpp
@@ -16,7 +16,7 @@
 #ifndef MLPACK_METHODS_NAIVE_BAYES_NAIVE_BAYES_CLASSIFIER_IMPL_HPP
 #define MLPACK_METHODS_NAIVE_BAYES_NAIVE_BAYES_CLASSIFIER_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 // In case it hasn't been included already.
 #include "naive_bayes_classifier.hpp"
diff --git a/src/mlpack/methods/naive_bayes/nbc_main.cpp b/src/mlpack/methods/naive_bayes/nbc_main.cpp
index 0f0d9f4..254d5eb 100644
--- a/src/mlpack/methods/naive_bayes/nbc_main.cpp
+++ b/src/mlpack/methods/naive_bayes/nbc_main.cpp
@@ -12,7 +12,9 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/data/normalize_labels.hpp>
 
 #include "naive_bayes_classifier.hpp"
 
diff --git a/src/mlpack/methods/nca/nca.hpp b/src/mlpack/methods/nca/nca.hpp
index a02a85f..6f74755 100644
--- a/src/mlpack/methods/nca/nca.hpp
+++ b/src/mlpack/methods/nca/nca.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_NCA_NCA_HPP
 #define MLPACK_METHODS_NCA_NCA_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
 
diff --git a/src/mlpack/methods/nca/nca_main.cpp b/src/mlpack/methods/nca/nca_main.cpp
index 2d1221e..93dba19 100644
--- a/src/mlpack/methods/nca/nca_main.cpp
+++ b/src/mlpack/methods/nca/nca_main.cpp
@@ -9,7 +9,10 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/data/normalize_labels.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/math/random.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 
 #include "nca.hpp"
diff --git a/src/mlpack/methods/nca/nca_softmax_error_function.hpp b/src/mlpack/methods/nca/nca_softmax_error_function.hpp
index 8054e10..1b3f9bb 100644
--- a/src/mlpack/methods/nca/nca_softmax_error_function.hpp
+++ b/src/mlpack/methods/nca/nca_softmax_error_function.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NCA_NCA_SOFTMAX_ERROR_FUNCTION_HPP
 #define MLPACK_METHODS_NCA_NCA_SOFTMAX_ERROR_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace nca {
diff --git a/src/mlpack/methods/neighbor_search/kfn_main.cpp b/src/mlpack/methods/neighbor_search/kfn_main.cpp
index 9157891..95faefa 100644
--- a/src/mlpack/methods/neighbor_search/kfn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/kfn_main.cpp
@@ -10,7 +10,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <string>
 #include <fstream>
diff --git a/src/mlpack/methods/neighbor_search/knn_main.cpp b/src/mlpack/methods/neighbor_search/knn_main.cpp
index 2e8d878..faa26f0 100644
--- a/src/mlpack/methods/neighbor_search/knn_main.cpp
+++ b/src/mlpack/methods/neighbor_search/knn_main.cpp
@@ -10,7 +10,9 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/core/tree/cover_tree.hpp>
 
 #include <string>
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search.hpp b/src/mlpack/methods/neighbor_search/neighbor_search.hpp
index e0506ac..064a6af 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <vector>
 #include <string>
 
@@ -21,7 +21,6 @@
 #include <mlpack/core/tree/rectangle_tree.hpp>
 #include <mlpack/core/tree/binary_space_tree/binary_space_tree.hpp>
 
-#include <mlpack/core/metrics/lmetric.hpp>
 #include "neighbor_search_stat.hpp"
 #include "sort_policies/nearest_neighbor_sort.hpp"
 #include "neighbor_search_rules.hpp"
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
index f25ef3f..8aea761 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_impl.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_IMPL_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/greedy_single_tree_traverser.hpp>
 #include "neighbor_search_rules.hpp"
 #include <mlpack/core/tree/spill_tree/is_spill_tree.hpp>
diff --git a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
index 1ce0262..d23f998 100644
--- a/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
+++ b/src/mlpack/methods/neighbor_search/neighbor_search_stat.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_STAT_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_NEIGHBOR_SEARCH_STAT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/neighbor_search/sort_policies/furthest_neighbor_sort.hpp b/src/mlpack/methods/neighbor_search/sort_policies/furthest_neighbor_sort.hpp
index fc40f50..d7c0125 100644
--- a/src/mlpack/methods/neighbor_search/sort_policies/furthest_neighbor_sort.hpp
+++ b/src/mlpack/methods/neighbor_search/sort_policies/furthest_neighbor_sort.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_FURTHEST_NEIGHBOR_SORT_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_FURTHEST_NEIGHBOR_SORT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp b/src/mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp
index 3a623c6..9f97cb8 100644
--- a/src/mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp
+++ b/src/mlpack/methods/neighbor_search/sort_policies/nearest_neighbor_sort.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_NEAREST_NEIGHBOR_SORT_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_NEAREST_NEIGHBOR_SORT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/neighbor_search/unmap.hpp b/src/mlpack/methods/neighbor_search/unmap.hpp
index febc46a..884df85 100644
--- a/src/mlpack/methods/neighbor_search/unmap.hpp
+++ b/src/mlpack/methods/neighbor_search/unmap.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_NEIGHBOR_SEARCH_UNMAP_HPP
 #define MLPACK_METHODS_NEIGHBOR_SEARCH_UNMAP_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/nmf/nmf_main.cpp b/src/mlpack/methods/nmf/nmf_main.cpp
index a9d0645..e65a6fa 100644
--- a/src/mlpack/methods/nmf/nmf_main.cpp
+++ b/src/mlpack/methods/nmf/nmf_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 
 #include <mlpack/methods/amf/amf.hpp>
 
diff --git a/src/mlpack/methods/nystroem_method/kmeans_selection.hpp b/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
index eb73fd3..5acc79a 100644
--- a/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
+++ b/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
 #define MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/kmeans/kmeans.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/nystroem_method/nystroem_method.hpp b/src/mlpack/methods/nystroem_method/nystroem_method.hpp
index 31d4a36..e2e0bca 100644
--- a/src/mlpack/methods/nystroem_method/nystroem_method.hpp
+++ b/src/mlpack/methods/nystroem_method/nystroem_method.hpp
@@ -15,7 +15,7 @@
 #ifndef MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
 #define MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include "kmeans_selection.hpp"
 
 namespace mlpack {
diff --git a/src/mlpack/methods/nystroem_method/ordered_selection.hpp b/src/mlpack/methods/nystroem_method/ordered_selection.hpp
index 88aaa69..008d484 100644
--- a/src/mlpack/methods/nystroem_method/ordered_selection.hpp
+++ b/src/mlpack/methods/nystroem_method/ordered_selection.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
 #define MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/methods/nystroem_method/random_selection.hpp b/src/mlpack/methods/nystroem_method/random_selection.hpp
index 425fb39..c670cc7 100644
--- a/src/mlpack/methods/nystroem_method/random_selection.hpp
+++ b/src/mlpack/methods/nystroem_method/random_selection.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
 #define MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace kernel {
diff --git a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
index 1291f89..fe0fb0c 100644
--- a/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
+++ b/src/mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp
@@ -16,7 +16,7 @@
 #ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_EXACT_SVD_METHOD_HPP
 #define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_EXACT_SVD_METHOD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace pca {
diff --git a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
index 68affac..df18f0b 100644
--- a/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
+++ b/src/mlpack/methods/pca/decomposition_policies/quic_svd_method.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_QUIC_SVD_METHOD_HPP
 #define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_QUIC_SVD_METHOD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/quic_svd/quic_svd.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
index 216e869..f7f9089 100644
--- a/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
+++ b/src/mlpack/methods/pca/decomposition_policies/randomized_svd_method.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
 #define MLPACK_METHODS_PCA_DECOMPOSITION_POLICIES_RANDOMIZED_SVD_METHOD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/randomized_svd/randomized_svd.hpp>
 #include <mlpack/methods/ann/init_rules/random_init.hpp>
 
diff --git a/src/mlpack/methods/pca/pca.hpp b/src/mlpack/methods/pca/pca.hpp
index 8bf578f..271f562 100644
--- a/src/mlpack/methods/pca/pca.hpp
+++ b/src/mlpack/methods/pca/pca.hpp
@@ -16,7 +16,7 @@
 #ifndef MLPACK_METHODS_PCA_PCA_HPP
 #define MLPACK_METHODS_PCA_PCA_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/pca/pca_impl.hpp b/src/mlpack/methods/pca/pca_impl.hpp
index 89499ad..87467f5 100644
--- a/src/mlpack/methods/pca/pca_impl.hpp
+++ b/src/mlpack/methods/pca/pca_impl.hpp
@@ -16,7 +16,8 @@
 #ifndef MLPACK_METHODS_PCA_PCA_IMPL_HPP
 #define MLPACK_METHODS_PCA_PCA_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/lin_alg.hpp>
 #include "pca.hpp"
 
 using namespace std;
diff --git a/src/mlpack/methods/pca/pca_main.cpp b/src/mlpack/methods/pca/pca_main.cpp
index 248a1ea..26b10d9 100644
--- a/src/mlpack/methods/pca/pca_main.cpp
+++ b/src/mlpack/methods/pca/pca_main.cpp
@@ -10,7 +10,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 
 #include "pca.hpp"
 #include <mlpack/methods/pca/decomposition_policies/exact_svd_method.hpp>
diff --git a/src/mlpack/methods/perceptron/initialization_methods/random_init.hpp b/src/mlpack/methods/perceptron/initialization_methods/random_init.hpp
index 5c28e1c..c1a2c32 100644
--- a/src/mlpack/methods/perceptron/initialization_methods/random_init.hpp
+++ b/src/mlpack/methods/perceptron/initialization_methods/random_init.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_PERCEPTRON_INITIALIZATION_METHODS_RANDOM_INIT_HPP
 #define MLPACK_METHODS_PERCEPTRON_INITIALIZATION_METHODS_RANDOM_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace perceptron {
diff --git a/src/mlpack/methods/perceptron/initialization_methods/zero_init.hpp b/src/mlpack/methods/perceptron/initialization_methods/zero_init.hpp
index af13601..76a5fd6 100644
--- a/src/mlpack/methods/perceptron/initialization_methods/zero_init.hpp
+++ b/src/mlpack/methods/perceptron/initialization_methods/zero_init.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_PERCEPTRON_INITIALIZATION_METHODS_ZERO_INIT_HPP
 #define MLPACK_METHODS_PERCEPTRON_INITIALIZATION_METHODS_ZERO_INIT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace perceptron {
diff --git a/src/mlpack/methods/perceptron/learning_policies/simple_weight_update.hpp b/src/mlpack/methods/perceptron/learning_policies/simple_weight_update.hpp
index 81a8a4f..71a2a75 100644
--- a/src/mlpack/methods/perceptron/learning_policies/simple_weight_update.hpp
+++ b/src/mlpack/methods/perceptron/learning_policies/simple_weight_update.hpp
@@ -12,7 +12,7 @@
 #ifndef _MLPACK_METHODS_PERCEPTRON_LEARNING_POLICIES_SIMPLE_WEIGHT_UPDATE_HPP
 #define _MLPACK_METHODS_PERCEPTRON_LEARNING_POLICIES_SIMPLE_WEIGHT_UPDATE_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 /**
  * This class is used to update the weightVectors matrix according to the simple
diff --git a/src/mlpack/methods/perceptron/perceptron.hpp b/src/mlpack/methods/perceptron/perceptron.hpp
index 851248a..7f8e8ab 100644
--- a/src/mlpack/methods/perceptron/perceptron.hpp
+++ b/src/mlpack/methods/perceptron/perceptron.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_PERCEPTRON_PERCEPTRON_HPP
 #define MLPACK_METHODS_PERCEPTRON_PERCEPTRON_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "initialization_methods/zero_init.hpp"
 #include "initialization_methods/random_init.hpp"
diff --git a/src/mlpack/methods/perceptron/perceptron_main.cpp b/src/mlpack/methods/perceptron/perceptron_main.cpp
index 3093191..38dbef0 100644
--- a/src/mlpack/methods/perceptron/perceptron_main.cpp
+++ b/src/mlpack/methods/perceptron/perceptron_main.cpp
@@ -13,7 +13,9 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/data/normalize_labels.hpp>
 #include "perceptron.hpp"
 
 using namespace mlpack;
diff --git a/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp b/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp
index 853fa7b..4d20974 100644
--- a/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp
@@ -9,8 +9,9 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/data/binarize.hpp>
+#include <mlpack/core/util/param.hpp>
 
 PROGRAM_INFO("Binarize Data", "This utility takes a dataset and binarizes the "
     "variables into either 0 or 1 given threshold. User can apply binarization "
diff --git a/src/mlpack/methods/preprocess/preprocess_describe_main.cpp b/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
index e654401..37db25b 100644
--- a/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_describe_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include <boost/format.hpp>
 #include <boost/lexical_cast.hpp>
 
diff --git a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
index 117563d..fb1e1f1 100644
--- a/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_imputer_main.cpp
@@ -10,7 +10,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include <mlpack/core/data/imputer.hpp>
 #include <mlpack/core/data/dataset_mapper.hpp>
 #include <mlpack/core/data/map_policies/increment_policy.hpp>
diff --git a/src/mlpack/methods/preprocess/preprocess_split_main.cpp b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
index 0a458a8..8796871 100644
--- a/src/mlpack/methods/preprocess/preprocess_split_main.cpp
+++ b/src/mlpack/methods/preprocess/preprocess_split_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include <mlpack/core/data/split_data.hpp>
 
 PROGRAM_INFO("Split Data", "This utility takes a dataset and optionally labels "
diff --git a/src/mlpack/methods/quic_svd/quic_svd.hpp b/src/mlpack/methods/quic_svd/quic_svd.hpp
index 3e9986f..a2b0a20 100644
--- a/src/mlpack/methods/quic_svd/quic_svd.hpp
+++ b/src/mlpack/methods/quic_svd/quic_svd.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_QUIC_SVD_QUIC_SVD_HPP
 #define MLPACK_METHODS_QUIC_SVD_QUIC_SVD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/tree/cosine_tree/cosine_tree.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/radical/radical.cpp b/src/mlpack/methods/radical/radical.cpp
index 85e06bd..93cf392 100644
--- a/src/mlpack/methods/radical/radical.cpp
+++ b/src/mlpack/methods/radical/radical.cpp
@@ -11,6 +11,8 @@
  */
 
 #include "radical.hpp"
+#include <mlpack/core/util/log.hpp>
+#include <mlpack/core/util/timers.hpp>
 
 using namespace std;
 using namespace arma;
diff --git a/src/mlpack/methods/radical/radical.hpp b/src/mlpack/methods/radical/radical.hpp
index 5a8b966..cf9f6c1 100644
--- a/src/mlpack/methods/radical/radical.hpp
+++ b/src/mlpack/methods/radical/radical.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_RADICAL_RADICAL_HPP
 #define MLPACK_METHODS_RADICAL_RADICAL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace radical {
diff --git a/src/mlpack/methods/radical/radical_main.cpp b/src/mlpack/methods/radical/radical_main.cpp
index ecdac0d..07f40e8 100644
--- a/src/mlpack/methods/radical/radical_main.cpp
+++ b/src/mlpack/methods/radical/radical_main.cpp
@@ -10,7 +10,9 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+#include <mlpack/core/math/random.hpp>
 #include "radical.hpp"
 
 PROGRAM_INFO("RADICAL", "An implementation of RADICAL, a method for independent"
diff --git a/src/mlpack/methods/randomized_svd/randomized_svd.hpp b/src/mlpack/methods/randomized_svd/randomized_svd.hpp
index d1bba75..3a84f01 100644
--- a/src/mlpack/methods/randomized_svd/randomized_svd.hpp
+++ b/src/mlpack/methods/randomized_svd/randomized_svd.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_RANDOMIZED_SVD_RANDOMIZED_SVD_HPP
 #define MLPACK_METHODS_RANDOMIZED_SVD_RANDOMIZED_SVD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace svd {
diff --git a/src/mlpack/methods/range_search/range_search.hpp b/src/mlpack/methods/range_search/range_search.hpp
index e41f7f6..2182795 100644
--- a/src/mlpack/methods/range_search/range_search.hpp
+++ b/src/mlpack/methods/range_search/range_search.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_HPP
 #define MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/core/tree/binary_space_tree.hpp>
 #include "range_search_stat.hpp"
diff --git a/src/mlpack/methods/range_search/range_search_main.cpp b/src/mlpack/methods/range_search/range_search_main.cpp
index 698d6e3..64c8655 100644
--- a/src/mlpack/methods/range_search/range_search_main.cpp
+++ b/src/mlpack/methods/range_search/range_search_main.cpp
@@ -11,7 +11,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/metrics/lmetric.hpp>
 #include <mlpack/core/tree/cover_tree.hpp>
 
diff --git a/src/mlpack/methods/range_search/range_search_stat.hpp b/src/mlpack/methods/range_search/range_search_stat.hpp
index 0a8e243..8df48d7 100644
--- a/src/mlpack/methods/range_search/range_search_stat.hpp
+++ b/src/mlpack/methods/range_search/range_search_stat.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_STAT_HPP
 #define MLPACK_METHODS_RANGE_SEARCH_RANGE_SEARCH_STAT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace range {
diff --git a/src/mlpack/methods/range_search/rs_model.cpp b/src/mlpack/methods/range_search/rs_model.cpp
index aac504a..6857f34 100644
--- a/src/mlpack/methods/range_search/rs_model.cpp
+++ b/src/mlpack/methods/range_search/rs_model.cpp
@@ -10,6 +10,7 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "rs_model.hpp"
+#include <mlpack/core/math/random_basis.hpp>
 
 using namespace std;
 using namespace mlpack;
diff --git a/src/mlpack/methods/rann/krann_main.cpp b/src/mlpack/methods/rann/krann_main.cpp
index b416121..9675cc4 100644
--- a/src/mlpack/methods/rann/krann_main.cpp
+++ b/src/mlpack/methods/rann/krann_main.cpp
@@ -10,7 +10,7 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "ra_search.hpp"
 #include "ra_model.hpp"
diff --git a/src/mlpack/methods/rann/ra_model_impl.hpp b/src/mlpack/methods/rann/ra_model_impl.hpp
index 0917282..62b7e9d 100644
--- a/src/mlpack/methods/rann/ra_model_impl.hpp
+++ b/src/mlpack/methods/rann/ra_model_impl.hpp
@@ -14,6 +14,7 @@
 
 // In case it hasn't been included yet.
 #include "ra_model.hpp"
+#include <mlpack/core/math/random_basis.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/rann/ra_query_stat.hpp b/src/mlpack/methods/rann/ra_query_stat.hpp
index 4a9cfd0..003da81 100644
--- a/src/mlpack/methods/rann/ra_query_stat.hpp
+++ b/src/mlpack/methods/rann/ra_query_stat.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_RANN_RA_QUERY_STAT_HPP
 #define MLPACK_METHODS_RANN_RA_QUERY_STAT_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/core/tree/binary_space_tree.hpp>
 
diff --git a/src/mlpack/methods/rann/ra_search.hpp b/src/mlpack/methods/rann/ra_search.hpp
index fe87e2c..c88e189 100644
--- a/src/mlpack/methods/rann/ra_search.hpp
+++ b/src/mlpack/methods/rann/ra_search.hpp
@@ -23,7 +23,7 @@
 #ifndef MLPACK_METHODS_RANN_RA_SEARCH_HPP
 #define MLPACK_METHODS_RANN_RA_SEARCH_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include <mlpack/core/tree/binary_space_tree.hpp>
 
diff --git a/src/mlpack/methods/rann/ra_search_impl.hpp b/src/mlpack/methods/rann/ra_search_impl.hpp
index 699bac7..2a292f4 100644
--- a/src/mlpack/methods/rann/ra_search_impl.hpp
+++ b/src/mlpack/methods/rann/ra_search_impl.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_RANN_RA_SEARCH_IMPL_HPP
 #define MLPACK_METHODS_RANN_RA_SEARCH_IMPL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 #include "ra_search_rules.hpp"
 
diff --git a/src/mlpack/methods/rann/ra_util.hpp b/src/mlpack/methods/rann/ra_util.hpp
index 7139421..a7f3f2f 100644
--- a/src/mlpack/methods/rann/ra_util.hpp
+++ b/src/mlpack/methods/rann/ra_util.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_RANN_RA_UTIL_HPP
 #define MLPACK_METHODS_RANN_RA_UTIL_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace neighbor {
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd.hpp b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
index 9dfa645..11114ed 100644
--- a/src/mlpack/methods/regularized_svd/regularized_svd.hpp
+++ b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_SVD_HPP
 #define MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_SVD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
 #include <mlpack/methods/cf/cf.hpp>
 
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp b/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp
index 1ff2818..e4cd620 100644
--- a/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp
+++ b/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_FUNCTION_SVD_HPP
 #define MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_FUNCTION_SVD_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
 
 namespace mlpack {
diff --git a/src/mlpack/methods/rmva/rmva.hpp b/src/mlpack/methods/rmva/rmva.hpp
new file mode 100644
index 0000000..a469296
--- /dev/null
+++ b/src/mlpack/methods/rmva/rmva.hpp
@@ -0,0 +1,963 @@
+/**
+ * @file rmva.hpp
+ * @author Marcus Edel
+ *
+ * Definition of the RecurrentNeuralAttention class, which implements the
+ * Recurrent Model for Visual Attention.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef __MLPACK_METHODS_RMVA_RMVA_HPP
+#define __MLPACK_METHODS_RMVA_RMVA_HPP
+
+#include <mlpack/prereqs.hpp>
+
+#include <mlpack/methods/ann/network_util.hpp>
+#include <mlpack/methods/ann/layer/layer_traits.hpp>
+#include <mlpack/methods/ann/init_rules/random_init.hpp>
+#include <mlpack/methods/ann/performance_functions/cee_function.hpp>
+#include <mlpack/core/optimizers/rmsprop/rmsprop.hpp>
+#include <mlpack/methods/ann/layer/negative_log_likelihood_layer.hpp>
+#include <mlpack/methods/ann/layer/vr_class_reward_layer.hpp>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+namespace mlpack {
+namespace ann /** Artificial Neural Network. */ {
+
+/**
+ * This class implements the Recurrent Model for Visual Attention, using a
+ * variety of possible layer implementations.
+ *
+ * For more information, see the following paper.
+ *
+ * @code
+ * @article{MnihHGK14,
+ *   title={Recurrent Models of Visual Attention},
+ *   author={Volodymyr Mnih, Nicolas Heess, Alex Graves, Koray Kavukcuoglu},
+ *   journal={CoRR},
+ *   volume={abs/1406.6247},
+ *   year={2014}
+ * }
+ * @endcode
+ *
+ * @tparam LocatorType Type of locator network.
+ * @tparam LocationSensorType Type of location sensor network.
+ * @tparam GlimpseSensorType Type of glimpse sensor network.
+ * @tparam GlimpseType Type of glimpse network.
+ * @tparam StartType Type of start network.
+ * @tparam FeedbackType Type of feedback network.
+ * @tparam TransferType Type of transfer network.
+ * @tparam ClassifierType Type of classifier network.
+ * @tparam RewardPredictorType Type of reward predictor network.
+ * @tparam InitializationRuleType Rule used to initialize the weight matrix.
+ * @tparam MatType Matrix type (arma::mat or arma::sp_mat).
+ */
+template<
+  typename LocatorType,
+  typename LocationSensorType,
+  typename GlimpseSensorType,
+  typename GlimpseType,
+  typename StartType,
+  typename FeedbackType,
+  typename TransferType,
+  typename ClassifierType,
+  typename RewardPredictorType,
+  typename InitializationRuleType = RandomInitialization,
+  typename MatType = arma::mat
+>
+class RecurrentNeuralAttention
+{
+ public:
+  //! Convenience typedef for the internal model construction.
+  using NetworkType = RecurrentNeuralAttention<
+      LocatorType,
+      LocationSensorType,
+      GlimpseSensorType,
+      GlimpseType,
+      StartType,
+      FeedbackType,
+      TransferType,
+      ClassifierType,
+      RewardPredictorType,
+      InitializationRuleType,
+      MatType>;
+
+  /**
+   * Construct the RecurrentNeuralAttention object, which will construct the
+   * recurrent model for visual attentionh using the specified networks.
+   *
+   * @param locator The locator network.
+   * @param locationSensor The location sensor network.
+   * @param glimpseSensor The glimpse sensor network.
+   * @param glimpse The glimpse network.
+   * @param start The start network.
+   * @param feedback The feedback network.
+   * @param transfer The transfer network.
+   * @param classifier The classifier network.
+   * @param rewardPredictor The reward predictor network.
+   * @param nStep Number of steps for the back-propagate through time.
+   * @param initializeRule Rule used to initialize the weight matrix.
+   */
+  template<typename TypeLocator,
+           typename TypeLocationSensor,
+           typename TypeGlimpseSensor,
+           typename TypeGlimpse,
+           typename TypeStart,
+           typename TypeFeedback,
+           typename TypeTransfer,
+           typename TypeClassifier,
+           typename TypeRewardPredictor>
+  RecurrentNeuralAttention(TypeLocator&& locator,
+                           TypeLocationSensor&& locationSensor,
+                           TypeGlimpseSensor&& glimpseSensor,
+                           TypeGlimpse&& glimpse,
+                           TypeStart&& start,
+                           TypeFeedback&& feedback,
+                           TypeTransfer&& transfer,
+                           TypeClassifier&& classifier,
+                           TypeRewardPredictor&& rewardPredictor,
+                           const size_t nStep,
+                           InitializationRuleType initializeRule =
+                              InitializationRuleType());
+  /**
+   * Train the network on the given input data using the given optimizer.
+   *
+   * This will use the existing model parameters as a starting point for the
+   * optimization. If this is not what you want, then you should access the
+   * parameters vector directly with Parameters() and modify it as desired.
+   *
+   * @tparam OptimizerType Type of optimizer to use to train the model.
+   * @param predictors Input training variables.
+   * @param responses Outputs results from input training variables.
+   * @param optimizer Instantiated optimizer used to train the model.
+   */
+  template<
+      template<typename> class OptimizerType = mlpack::optimization::RMSprop
+  >
+  void Train(const arma::mat& predictors,
+             const arma::mat& responses,
+             OptimizerType<NetworkType>& optimizer);
+
+  /**
+   * Predict the responses to a given set of predictors. The responses will
+   * reflect the output of the given output layer as returned by the
+   * OutputClass() function.
+   *
+   * @param predictors Input predictors.
+   * @param responses Matrix to put output predictions of responses into.
+   */
+  void Predict(arma::mat& predictors, arma::mat& responses);
+
+  /**
+   * Evaluate the network with the given parameters. This function is usually
+   * called by the optimizer to train the model.
+   *
+   * @param parameters Matrix model parameters.
+   * @param i Index of point to use for objective function evaluation.
+   * @param deterministic Whether or not to train or test the model. Note some
+   * layer act differently in training or testing mode.
+   */
+  double Evaluate(const arma::mat& parameters,
+                  const size_t i,
+                  const bool deterministic = true);
+
+  /**
+   * Evaluate the gradient of the network with the given parameters, and with
+   * respect to only one point in the dataset. This is useful for
+   * optimizers such as SGD, which require a separable objective function.
+   *
+   * @param parameters Matrix of the model parameters to be optimized.
+   * @param i Index of points to use for objective function gradient evaluation.
+   * @param gradient Matrix to output gradient into.
+   */
+  void Gradient(const arma::mat& parameters,
+                const size_t i,
+                arma::mat& gradient);
+
+  //! Return the number of separable functions (the number of predictor points).
+  size_t NumFunctions() const { return numFunctions; }
+
+  //! Return the initial point for the optimization.
+  const arma::mat& Parameters() const { return parameter; }
+  //! Modify the initial point for the optimization.
+  arma::mat& Parameters() { return parameter; }
+
+  //! Return the number of steps to back-propagate through time.
+  const size_t& Rho() const { return nStep; }
+  //! Modify the number of steps to back-propagate through time.
+  size_t& Rho() { return nStep; }
+
+  //! Return the current location.
+  const arma::mat& Location();
+
+  //! Serialize the model.
+  template<typename Archive>
+  void Serialize(Archive& ar, const unsigned int /* version */);
+
+ private:
+  /*
+   * Predict the response of the given input matrix.
+   */
+  template <typename InputType, typename OutputType>
+  void SinglePredict(const InputType& input, OutputType& output)
+  {
+    // Get the locator input size.
+    if (!inputSize)
+    {
+      inputSize = NetworkInputSize(locator);
+    }
+
+    // Reset networks.
+    ResetParameter(locator);
+    ResetParameter(locationSensor);
+    ResetParameter(glimpseSensor);
+    ResetParameter(glimpse);
+    ResetParameter(feedback);
+    ResetParameter(transfer);
+    ResetParameter(classifier);
+    ResetParameter(rewardPredictor);
+    ResetParameter(start);
+
+    // Sample an initial starting actions by forwarding zeros through the
+    // locator.
+    locatorInput.push_back(new arma::cube(arma::zeros<arma::cube>(inputSize, 1,
+        input.n_slices)));
+
+    // Forward pass throught the recurrent network.
+    for (step = 0; step < nStep; step++)
+    {
+      // Locator forward pass.
+      Forward(locatorInput.back(), locator);
+
+      // Location sensor forward pass.
+      Forward(std::get<std::tuple_size<LocatorType>::value - 1>(
+          locator).OutputParameter(), locationSensor);
+
+      // Set the location parameter for all layer that implement a Location
+      // function e.g. GlimpseLayer.
+      ResetLocation(std::get<std::tuple_size<LocatorType>::value - 1>(
+          locator).OutputParameter(), glimpseSensor);
+
+      // Glimpse sensor forward pass.
+      Forward(input, glimpseSensor);
+
+      // Concat the parameter activation from the location sensor and
+      // glimpse sensor.
+      arma::mat concatLayerOutput = arma::join_cols(
+          std::get<std::tuple_size<LocationSensorType>::value - 1>(
+          locationSensor).OutputParameter(),
+          std::get<std::tuple_size<GlimpseSensorType>::value - 1>(
+          glimpseSensor).OutputParameter());
+
+      // Glimpse forward pass.
+      Forward(concatLayerOutput, glimpse);
+
+      if (step == 0)
+      {
+        // Start forward pass.
+        Forward(std::get<std::tuple_size<GlimpseType>::value - 1>(
+            glimpse).OutputParameter(), start);
+
+        // Transfer forward pass.
+        Forward(std::get<std::tuple_size<StartType>::value - 1>(
+            start).OutputParameter(), transfer);
+      }
+      else
+      {
+        // Feedback forward pass.
+        Forward(std::get<std::tuple_size<TransferType>::value - 1>(
+            transfer).OutputParameter(), feedback);
+
+        arma::mat feedbackLayerOutput =
+          std::get<std::tuple_size<GlimpseType>::value - 1>(
+          glimpse).OutputParameter() +
+          std::get<std::tuple_size<FeedbackType>::value - 1>(
+          feedback).OutputParameter();
+
+        // Transfer forward pass.
+        Forward(feedbackLayerOutput, transfer);
+      }
+
+      // Update the input for the next run
+      locatorInput.push_back(new arma::cube(
+          std::get<std::tuple_size<TransferType>::value - 1>(
+          transfer).OutputParameter().memptr(), locatorInput.back().n_rows,
+          locatorInput.back().n_cols, locatorInput.back().n_slices));
+    }
+
+    // Classifier forward pass.
+    Forward(locatorInput.back().slice(0), classifier);
+
+    output = std::get<std::tuple_size<ClassifierType>::value - 1>(
+        classifier).OutputParameter();
+  }
+
+  /**
+   * Update the layer reward for all layer that implement the Rewards function.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetReward(const double reward, std::tuple<Tp...>& network)
+  {
+    SetReward(reward, std::get<I>(network));
+    ResetReward<I + 1, Tp...>(reward, network);
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetReward(const double /* reward */, std::tuple<Tp...>& /* network */)
+  {
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      HasRewardCheck<T, double&(T::*)()>::value, void>::type
+  SetReward(const double reward, T& layer)
+  {
+    layer.Reward() = reward;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasRewardCheck<T, double&(T::*)()>::value, void>::type
+  SetReward(const double /* reward */, T& /* layer */)
+  {
+    /* Nothing to do here */
+  }
+
+  /**
+   * Reset the network by clearing the delta and by setting the layer status.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& /* network */) { /* Nothing to do here */ }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetParameter(std::tuple<Tp...>& network)
+  {
+    ResetDeterministic(std::get<I>(network));
+    std::get<I>(network).Delta().zeros();
+
+    ResetParameter<I + 1, Tp...>(network);
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& layer)
+  {
+    layer.Deterministic() = deterministic;
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasDeterministicCheck<T, bool&(T::*)(void)>::value, void>::type
+  ResetDeterministic(T& /* layer */) { /* Nothing to do here */ }
+
+  /**
+   * Reset the location by updating the location for all layer that implement
+   * the Location function.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ResetLocation(const arma::mat& /* location */,
+                std::tuple<Tp...>& /* network */)
+  {
+    // Nothing to do here.
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ResetLocation(const arma::mat& location, std::tuple<Tp...>& network)
+  {
+    SetLocation(std::get<I>(network), location);
+    ResetLocation<I + 1, Tp...>(location, network);
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
+  SetLocation(T& layer, const arma::mat& location)
+  {
+    layer.Location(location);
+  }
+
+  template<typename T>
+  typename std::enable_if<
+      !HasLocationCheck<T, void(T::*)(const arma::mat&)>::value, void>::type
+  SetLocation(T& /* layer */, const arma::mat& /* location */)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Save the network layer activations.
+   */
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<MatType>& activations,
+                  std::tuple<Tp...>& network,
+                  size_t& activationCounter)
+  {
+    Save(I, activations, std::get<I>(network),
+        std::get<I>(network).InputParameter());
+
+    activationCounter++;
+    SaveActivations<I + 1, Tp...>(activations, network, activationCounter);
+  }
+
+  template<size_t I = 0, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<MatType>& /* activations */,
+                  std::tuple<Tp...>& /* network */,
+                  size_t& /* activationCounter */)
+  {
+    // Nothing to do here.
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when storing
+   * the activations.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Save(const size_t /* layerNumber */,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* unused */)
+  {
+    activations.push_back(new MatType(layer.RecurrentParameter()));
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Save(const size_t /* layerNumber */,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* unused */)
+  {
+    activations.push_back(new MatType(layer.OutputParameter()));
+  }
+
+  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<DataTypeA>& activationsA,
+                  boost::ptr_vector<DataTypeB>& activationsB,
+                  size_t& dataTypeACounter,
+                  size_t& dataTypeBCounter,
+                  std::tuple<Tp...>& network)
+  {
+    Save(activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
+        std::get<I>(network), std::get<I>(network).OutputParameter());
+
+    SaveActivations<I + 1, DataTypeA, DataTypeB, Tp...>(
+        activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
+        network);
+  }
+
+  template<size_t I = 0, typename DataTypeA, typename DataTypeB, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  SaveActivations(boost::ptr_vector<DataTypeA>& /* activationsA */,
+                  boost::ptr_vector<DataTypeB>& /* activationsB */,
+                  size_t& /* dataTypeACounter */,
+                  size_t& /* dataTypeBCounter */,
+                  std::tuple<Tp...>& /* network */)
+  {
+    // Nothing to do here.
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Save(boost::ptr_vector<DataTypeA>& activationsA,
+        boost::ptr_vector<DataTypeB>& /* activationsB */,
+       size_t& dataTypeACounter,
+       size_t& /* dataTypeBCounter */,
+       T& layer,
+       DataTypeA& /* unused */)
+  {
+    activationsA.push_back(new DataTypeA(layer.OutputParameter()));
+    dataTypeACounter++;
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Save(boost::ptr_vector<DataTypeA>& /* activationsA */,
+            boost::ptr_vector<DataTypeB>& activationsB,
+            size_t& /* dataTypeACounter */,
+            size_t& dataTypeBCounter,
+            T& layer,
+            DataTypeB& /* unused */)
+  {
+    activationsB.push_back(new DataTypeB(layer.OutputParameter()));
+    dataTypeBCounter++;
+  }
+
+  /**
+   * Load the network layer activations.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<MatType>& /* activations */,
+                  size_t& /* activationCounter */,
+                  std::tuple<Tp...>& network)
+  {
+    std::get<0>(network).InputParameter() = input;
+    LinkParameter(network);
+  }
+
+  template<size_t I = 0, typename DataType, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<MatType>& activations,
+                  size_t& activationCounter,
+                  std::tuple<Tp...>& network)
+  {
+    Load(--activationCounter, activations,
+        std::get<sizeof...(Tp) - I - 1>(network),
+        std::get<I>(network).InputParameter());
+
+    LoadActivations<I + 1, DataType, Tp...>(input, activations,
+        activationCounter, network);
+  }
+
+  /**
+   * Distinguish between recurrent layer and non-recurrent layer when storing
+   * the activations.
+   */
+  template<typename T, typename P>
+  typename std::enable_if<
+      HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Load(const size_t layerNumber,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* output */)
+  {
+    layer.RecurrentParameter() = activations[layerNumber];
+  }
+
+  template<typename T, typename P>
+  typename std::enable_if<
+      !HasRecurrentParameterCheck<T, P&(T::*)()>::value, void>::type
+  Load(const size_t layerNumber,
+       boost::ptr_vector<MatType>& activations,
+       T& layer,
+       P& /* output */)
+  {
+    layer.OutputParameter() = activations[layerNumber];
+  }
+
+  template<size_t I = 0,
+           typename DataType,
+           typename DataTypeA,
+           typename DataTypeB,
+           typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<DataTypeA>& activationsA,
+                  boost::ptr_vector<DataTypeB>& activationsB,
+                  size_t& dataTypeACounter,
+                  size_t& dataTypeBCounter,
+                  std::tuple<Tp...>& network)
+  {
+    Load(activationsA,
+         activationsB,
+         dataTypeACounter,
+         dataTypeBCounter,
+         std::get<sizeof...(Tp) - I - 1>(network),
+         std::get<sizeof...(Tp) - I - 1>(network).OutputParameter());
+
+    LoadActivations<I + 1, DataType, DataTypeA, DataTypeB, Tp...>(
+        input, activationsA, activationsB, dataTypeACounter, dataTypeBCounter,
+        network);
+  }
+
+  template<size_t I = 0,
+           typename DataType,
+           typename DataTypeA,
+           typename DataTypeB,
+           typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LoadActivations(DataType& input,
+                  boost::ptr_vector<DataTypeA>& /* activationsA */,
+                  boost::ptr_vector<DataTypeB>& /* activationsB */,
+                  size_t& /* dataTypeACounter */,
+                  size_t& /* dataTypeBCounter */,
+                  std::tuple<Tp...>& network)
+  {
+    std::get<0>(network).InputParameter() = input;
+    LinkParameter(network);
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Load(boost::ptr_vector<DataTypeA>& activationsA,
+            boost::ptr_vector<DataTypeB>& /* activationsB */,
+            size_t& dataTypeACounter,
+            size_t& /* dataTypeBCounter */,
+            T& layer,
+            DataTypeA& /* output */)
+  {
+    layer.OutputParameter() = activationsA[--dataTypeACounter];
+  }
+
+  template<typename T, typename DataTypeA, typename DataTypeB>
+  void Load(boost::ptr_vector<DataTypeA>& /* activationsA */,
+            boost::ptr_vector<DataTypeB>& activationsB,
+            size_t& /* dataTypeACounter */,
+            size_t& dataTypeBCounter,
+            T& layer,
+            DataTypeB& /* output */)
+  {
+    layer.OutputParameter() = activationsB[--dataTypeBCounter];
+  }
+
+  /**
+   * Run a single iteration of the feed forward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 0, typename DataType, typename... Tp>
+  void Forward(const DataType& input, std::tuple<Tp...>& t)
+  {
+    std::get<I>(t).InputParameter() = input;
+    std::get<I>(t).Forward(std::get<I>(t).InputParameter(),
+        std::get<I>(t).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(t);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& network)
+  {
+    LinkParameter(network);
+  }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  ForwardTail(std::tuple<Tp...>& t)
+  {
+    std::get<I>(t).Forward(std::get<I - 1>(t).OutputParameter(),
+        std::get<I>(t).OutputParameter());
+
+    ForwardTail<I + 1, Tp...>(t);
+  }
+
+  /**
+   * Run a single iteration of the backward algorithm, using the given
+   * input and target vector, store the calculated error into the error
+   * vector.
+   */
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<sizeof...(Tp) == 1, void>::type
+  Backward(const DataType& error, std::tuple<Tp ...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+      std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
+      std::get<sizeof...(Tp) - I>(t).Delta());
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  Backward(const DataType& error, std::tuple<Tp ...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+        std::get<sizeof...(Tp) - I>(t).OutputParameter(), error,
+        std::get<sizeof...(Tp) - I>(t).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, t);
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& /* error */, std::tuple<Tp...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
+        std::get<sizeof...(Tp) - I>(t).Delta());
+  }
+
+  template<size_t I = 1, typename DataType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp)), void>::type
+  BackwardTail(const DataType& error, std::tuple<Tp...>& t)
+  {
+    std::get<sizeof...(Tp) - I>(t).Backward(
+        std::get<sizeof...(Tp) - I>(t).OutputParameter(),
+        std::get<sizeof...(Tp) - I + 1>(t).Delta(),
+        std::get<sizeof...(Tp) - I>(t).Delta());
+
+    BackwardTail<I + 1, DataType, Tp...>(error, t);
+  }
+
+  /**
+   * Link the calculated activation with the correct layer.
+   */
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I == sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp ...>& /* network */) { /* Nothing to do here */ }
+
+  template<size_t I = 1, typename... Tp>
+  typename std::enable_if<I < sizeof...(Tp), void>::type
+  LinkParameter(std::tuple<Tp...>& network)
+  {
+    if (!LayerTraits<typename std::remove_reference<
+        decltype(std::get<I>(network))>::type>::IsBiasLayer)
+    {
+      std::get<I>(network).InputParameter() = std::get<I - 1>(
+          network).OutputParameter();
+    }
+
+    LinkParameter<I + 1, Tp...>(network);
+  }
+
+  /**
+   * Iterate through all layer modules and update the the gradient using the
+   * layer defined optimizer.
+   */
+  template<typename InputType, typename ErrorType, typename... Tp>
+  void UpdateGradients(const InputType& input,
+                       const ErrorType& error,
+                       std::tuple<Tp...>& network)
+  {
+     Update(std::get<0>(network),
+           input,
+           std::get<1>(network).Delta(),
+           std::get<1>(network).OutputParameter());
+
+     UpdateGradients<1, ErrorType, Tp...>(error, network);
+  }
+
+  template<size_t I = 0, typename ErrorType, typename... Tp>
+  typename std::enable_if<I < (sizeof...(Tp) - 1), void>::type
+  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network),
+           std::get<I>(network).InputParameter(),
+           std::get<I + 1>(network).Delta(),
+           std::get<I>(network).OutputParameter());
+
+    UpdateGradients<I + 1, ErrorType, Tp...>(error, network);
+  }
+
+  template<size_t I = 0, typename ErrorType, typename... Tp>
+  typename std::enable_if<I == (sizeof...(Tp) - 1), void>::type
+  UpdateGradients(const ErrorType& error, std::tuple<Tp...>& network)
+  {
+    Update(std::get<I>(network),
+       std::get<I>(network).InputParameter(),
+       error,
+       std::get<I>(network).OutputParameter());
+  }
+
+  template<typename LayerType,
+           typename InputType,
+           typename ErrorType,
+           typename GradientType>
+  typename std::enable_if<
+      HasGradientCheck<LayerType,
+          void(LayerType::*)(const InputType&,
+                             const ErrorType&,
+                             GradientType&)>::value, void>::type
+  Update(LayerType& layer,
+         const InputType& input,
+         const ErrorType& error,
+         GradientType& /* gradient */)
+  {
+    layer.Gradient(input, error, layer.Gradient());
+  }
+
+  template<typename LayerType,
+           typename InputType,
+           typename ErrorType,
+           typename GradientType>
+  typename std::enable_if<
+      !HasGradientCheck<LayerType,
+          void(LayerType::*)(const InputType&,
+                             const ErrorType&,
+                             GradientType&)>::value, void>::type
+  Update(LayerType& /* layer */,
+         const InputType& /* input */,
+         const ErrorType& /* error */,
+         GradientType& /* gradient */)
+  {
+    // Nothing to do here
+  }
+
+  //! The locator network.
+  LocatorType locator;
+
+  //! The location sensor network.
+  LocationSensorType locationSensor;
+
+  //! The glimpse sensor network.
+  GlimpseSensorType glimpseSensor;
+
+  //! The glimpse network.
+  GlimpseType glimpse;
+
+  //! The start network.
+  StartType start;
+
+  //! The feedback network.
+  FeedbackType feedback;
+
+  //! The transfer network.
+  TransferType transfer;
+
+  //! The classifier network.
+  ClassifierType classifier;
+
+  //! The reward predictor network.
+  RewardPredictorType rewardPredictor;
+
+  //! The number of steps for the back-propagate through time.
+  size_t nStep;
+
+  //! Locally stored network input size.
+  size_t inputSize;
+
+  //! The current evaluation mode (training or testing).
+  bool deterministic;
+
+  //! The index of the current step.
+  size_t step;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the glimpse network.
+  boost::ptr_vector<arma::mat> glimpseActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the locator network.
+  boost::ptr_vector<arma::mat> locatorActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the feedback network.
+  boost::ptr_vector<arma::mat> feedbackActivations;
+
+  //! The activation storage we are using to save the feedback network input.
+  boost::ptr_vector<arma::mat> feedbackActivationsInput;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the transfer network.
+  boost::ptr_vector<arma::mat> transferActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the location sensor network.
+  boost::ptr_vector<arma::mat> locationSensorActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the glimpse sensor network.
+  boost::ptr_vector<arma::mat> glimpseSensorMatActivations;
+  boost::ptr_vector<arma::cube> glimpseSensorCubeActivations;
+
+  //! The activation storage we are using to perform the feed backward pass for
+  //! the locator input.
+  boost::ptr_vector<arma::cube> locatorInput;
+
+  //! The storage we are using to save the location.
+  boost::ptr_vector<arma::mat> location;
+
+  //! The current number of activations in the glimpse sensor network.
+  size_t glimpseSensorMatCounter;
+  size_t glimpseSensorCubeCounter;
+
+  //! The current number of activations in the glimpse network.
+  size_t glimpseActivationsCounter;
+
+  //! The current number of activations in the glimpse start network.
+  size_t startActivationsCounter;
+
+  //! The current number of activations in the feedback network.
+  size_t feedbackActivationsCounter;
+
+  //! The current number of activations in the transfer network.
+  size_t transferActivationsCounter;
+
+  //! The current number of activations in the locator network.
+  size_t locatorActivationsCounter;
+
+  //! The current number of activations in the location sensor network.
+  size_t locationSensorActivationsCounter;
+
+  //! The current number of activations in the glimpse sensor network.
+  size_t glimpseSensorMatActivationsCounter;
+  size_t glimpseSensorCubeActivationsCounter;
+
+  //! The current number of location for the location storage.
+  size_t locationCounter;
+
+  //! Matrix of (trained) parameters.
+  arma::mat parameter;
+
+  //! The matrix of data points (predictors).
+  arma::mat predictors;
+
+  //! The matrix of responses to the input data points.
+  arma::mat responses;
+
+  //! The number of separable functions (the number of predictor points).
+  size_t numFunctions;
+
+  //! Storage the merge the reward input.
+  arma::field<arma::mat> rewardInput;
+
+  //! The current input.
+  arma::cube input;
+
+  //! The current target.
+  arma::mat target;
+
+  //! Locally stored performance functions.
+  NegativeLogLikelihoodLayer<> negativeLogLikelihoodFunction;
+  VRClassRewardLayer<> vRClassRewardFunction;
+
+  //! Locally stored size of the locator network.
+  size_t locatorSize;
+
+  //! Locally stored size of the location sensor network.
+  size_t locationSensorSize;
+
+  //! Locally stored size of the glimpse sensor network.
+  size_t glimpseSensorSize;
+
+  //! Locally stored size of the glimpse network.
+  size_t glimpseSize;
+
+  //! Locally stored size of the start network.
+  size_t startSize;
+
+  //! Locally stored size of the feedback network.
+  size_t feedbackSize;
+
+  //! Locally stored size of the transfer network.
+  size_t transferSize;
+
+  //! Locally stored size of the classifier network.
+  size_t classifierSize;
+
+  //! Locally stored size of the reward predictor network.
+  size_t rewardPredictorSize;
+
+  //! Locally stored recurrent gradient.
+  arma::mat recurrentGradient;
+
+  //! Locally stored action error.
+  arma::mat actionError;
+
+  //! Locally stored current location.
+  arma::mat evaluationLocation;
+}; // class RecurrentNeuralAttention
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "rmva_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/rmva/rmva_main.cpp b/src/mlpack/methods/rmva/rmva_main.cpp
new file mode 100644
index 0000000..fafac26
--- /dev/null
+++ b/src/mlpack/methods/rmva/rmva_main.cpp
@@ -0,0 +1,286 @@
+/**
+ * @file rmva_main.cpp
+ * @author Marcus Edel
+ *
+ * Main executable for the Recurrent Model for Visual Attention.
+ *
+ * mlpack is free software; you may redistribute it and/or modify it under the
+ * terms of the 3-clause BSD license.  You should have received a copy of the
+ * 3-clause BSD license along with mlpack.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
+
+#include "rmva.hpp"
+
+#include <mlpack/methods/ann/layer/glimpse_layer.hpp>
+#include <mlpack/methods/ann/layer/linear_layer.hpp>
+#include <mlpack/methods/ann/layer/bias_layer.hpp>
+#include <mlpack/methods/ann/layer/base_layer.hpp>
+#include <mlpack/methods/ann/layer/reinforce_normal_layer.hpp>
+#include <mlpack/methods/ann/layer/multiply_constant_layer.hpp>
+#include <mlpack/methods/ann/layer/constant_layer.hpp>
+#include <mlpack/methods/ann/layer/log_softmax_layer.hpp>
+#include <mlpack/methods/ann/layer/hard_tanh_layer.hpp>
+
+#include <mlpack/core/optimizers/minibatch_sgd/minibatch_sgd.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+
+using namespace mlpack;
+using namespace mlpack::ann;
+using namespace mlpack::optimization;
+using namespace std;
+
+PROGRAM_INFO("Recurrent Model for Visual Attention",
+    "This program trains the Recurrent Model for Visual Attention on the given "
+    "labeled training set, or loads a model from the given model file, and then"
+    " may use that trained model to classify the points in a given test set."
+    "\n\n"
+    "Labels are expected to be passed in separately as their own file "
+    "(--labels_file).  If training is not desired, a pre-existing model can be "
+    "loaded with the --input_model_file (-m) option."
+    "\n\n"
+    "If classifying a test set is desired, the test set should be in the file "
+    "specified with the --test_file (-T) option, and the classifications will "
+    "be saved to the file specified with the --output_file (-o) option.  If "
+    "saving a trained model is desired, the --output_model_file (-M) option "
+    "should be given.");
+
+// Model loading/saving.
+PARAM_STRING_IN("input_model_file", "File containing the Recurrent Model for "
+    "Visual Attention.", "m", "");
+PARAM_STRING_OUT("output_model_file", "File to save trained Recurrent Model for"
+    " Visual Attention to.", "M");
+
+// Training parameters.
+PARAM_MATRIX_IN("training", "Matrix containing the training set.", "t");
+PARAM_MATRIX_IN("labels", "Matrix containing labels for the training set.",
+    "l");
+
+PARAM_STRING_IN("optimizer", "Optimizer to use; 'sgd', 'minibatch-sgd', or "
+    "'lbfgs'.", "O", "minibatch-sgd");
+
+PARAM_INT_IN("max_iterations", "Maximum number of iterations for SGD or RMSProp"
+    " (0 indicates no limit).", "n", 500000);
+PARAM_DOUBLE_IN("tolerance", "Maximum tolerance for termination of SGD or "
+    "RMSProp.", "e", 1e-7);
+
+PARAM_DOUBLE_IN("step_size", "Step size for stochastic gradient descent "
+    "(alpha),", "a", 0.01);
+PARAM_FLAG("linear_scan", "Don't shuffle the order in which data points are "
+    "visited for SGD or mini-batch SGD.", "L");
+PARAM_INT_IN("batch_size", "Batch size for mini-batch SGD.", "b", 20);
+
+PARAM_INT_IN("rho", "Number of steps for the back-propagate through time.", "r",
+    7);
+
+PARAM_INT_IN("classes", "The number of classes.", "c", 10);
+
+PARAM_INT_IN("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
+
+// Test parameters.
+PARAM_MATRIX_IN("test", "Matrix containing the test set.", "T");
+PARAM_MATRIX_OUT("output", "The matrix in which the predicted labels for the "
+    "test set will be written.", "o");
+
+int main(int argc, char** argv)
+{
+  CLI::ParseCommandLine(argc, argv);
+
+  // Check input parameters.
+  if (CLI::HasParam("training") && CLI::HasParam("input_model_file"))
+    Log::Fatal << "Cannot specify both --training_file (-t) and "
+       << "--input_model_file (-m)!" << endl;
+
+  if (!CLI::HasParam("training") && !CLI::HasParam("input_model_file"))
+    Log::Fatal << "Neither --training_file (-t) nor --input_model_file (-m) are"
+        << " specified!" << endl;
+
+  if (!CLI::HasParam("training") && CLI::HasParam("labels"))
+    Log::Warn << "--labels_file (-l) ignored because --training_file (-t) is "
+        << "not specified." << endl;
+
+  if (!CLI::HasParam("output") && !CLI::HasParam("output_model_file"))
+    Log::Warn << "Neither --output_file (-o) nor --output_model_file (-M) "
+        << "specified; no output will be saved!" << endl;
+
+  if (CLI::HasParam("output") && !CLI::HasParam("test"))
+    Log::Warn << "--output_file (-o) ignored because no test file specified "
+        << "with --test_file (-T)." << endl;
+
+  if (!CLI::HasParam("output") && CLI::HasParam("test"))
+    Log::Warn << "--test_file (-T) specified, but classification results will "
+        << "not be saved because --output_file (-o) is not specified." << endl;
+
+  const string optimizerType = CLI::GetParam<string>("optimizer");
+
+  if ((optimizerType != "sgd") && (optimizerType != "lbfgs") &&
+      (optimizerType != "minibatch-sgd"))
+  {
+    Log::Fatal << "Optimizer type '" << optimizerType << "' unknown; must be "
+        << "'sgd', 'minibatch-sgd', or 'lbfgs'!" << endl;
+  }
+
+  const double stepSize = CLI::GetParam<double>("step_size");
+  const size_t maxIterations = (size_t) CLI::GetParam<int>("max_iterations");
+  const double tolerance = CLI::GetParam<double>("tolerance");
+  const bool shuffle = !CLI::HasParam("linear_scan");
+  const size_t batchSize = (size_t) CLI::GetParam<int>("batch_size");
+  const size_t rho = (size_t) CLI::GetParam<int>("rho");
+  const size_t numClasses = (size_t) CLI::GetParam<int>("classes");
+
+  const size_t hiddenSize = 256;
+  const double unitPixels = 13;
+  const double locatorStd = 0.11;
+  const size_t imageSize = 28;
+  const size_t locatorHiddenSize = 128;
+  const size_t glimpsePatchSize = 8;
+  const size_t glimpseDepth = 1;
+  const size_t glimpseScale = 2;
+  const size_t glimpseHiddenSize = 128;
+  const size_t imageHiddenSize = 256;
+
+
+  // Locator network.
+  LinearMappingLayer<> linearLayer0(hiddenSize, 2);
+  BiasLayer<> biasLayer0(2, 1);
+  HardTanHLayer<> hardTanhLayer0;
+  ReinforceNormalLayer<> reinforceNormalLayer0(2 * locatorStd);
+  HardTanHLayer<> hardTanhLayer1;
+  MultiplyConstantLayer<> multiplyConstantLayer0(2 * unitPixels / imageSize);
+  auto locator = std::tie(linearLayer0, biasLayer0, hardTanhLayer0,
+      reinforceNormalLayer0, hardTanhLayer1, multiplyConstantLayer0);
+
+  // Location sensor network.
+  LinearLayer<> linearLayer1(2, locatorHiddenSize);
+  BiasLayer<> biasLayer1(locatorHiddenSize, 1);
+  ReLULayer<> rectifierLayer0;
+  auto locationSensor = std::tie(linearLayer1, biasLayer1, rectifierLayer0);
+
+  // Glimpse sensor network.
+  GlimpseLayer<> glimpseLayer0(1, glimpsePatchSize, glimpseDepth, glimpseScale);
+  LinearMappingLayer<> linearLayer2(64, glimpseHiddenSize);
+  BiasLayer<> biasLayer2(glimpseHiddenSize, 1);
+  ReLULayer<> rectifierLayer1;
+  auto glimpseSensor = std::tie(glimpseLayer0, linearLayer2, biasLayer2,
+      rectifierLayer1);
+
+  // Glimpse network.
+  LinearLayer<> linearLayer3(glimpseHiddenSize + locatorHiddenSize,
+      imageHiddenSize);
+  BiasLayer<> biasLayer3(imageHiddenSize, 1);
+  ReLULayer<> rectifierLayer2;
+  LinearLayer<> linearLayer4(imageHiddenSize, hiddenSize);
+  BiasLayer<> biasLayer4(hiddenSize, 1);
+  auto glimpse = std::tie(linearLayer3, biasLayer3, rectifierLayer2,
+      linearLayer4, biasLayer4);
+
+  // Feedback network.
+  LinearLayer<> recurrentLayer0(imageHiddenSize, hiddenSize);
+  BiasLayer<> recurrentLayerBias0(hiddenSize, 1);
+  auto feedback = std::tie(recurrentLayer0, recurrentLayerBias0);
+
+  // Start network.
+  AdditionLayer<> startLayer0(hiddenSize, 1);
+  auto start = std::tie(startLayer0);
+
+  // Transfer network.
+  ReLULayer<> rectifierLayer3;
+  auto transfer = std::tie(rectifierLayer3);
+
+  // Classifier network.
+  LinearLayer<> linearLayer5(hiddenSize, numClasses);
+  BiasLayer<> biasLayer6(numClasses, 1);
+  LogSoftmaxLayer<> logSoftmaxLayer0;
+  auto classifier = std::tie(linearLayer5, biasLayer6, logSoftmaxLayer0);
+
+  // Reward predictor network.
+  ConstantLayer<> constantLayer0(1, 1);
+  AdditionLayer<> additionLayer0(1, 1);
+  auto rewardPredictor = std::tie(constantLayer0, additionLayer0);
+
+  // Recurrent Model for Visual Attention.
+  RecurrentNeuralAttention<decltype(locator),
+                           decltype(locationSensor),
+                           decltype(glimpseSensor),
+                           decltype(glimpse),
+                           decltype(start),
+                           decltype(feedback),
+                           decltype(transfer),
+                           decltype(classifier),
+                           decltype(rewardPredictor),
+                           RandomInitialization>
+    net(locator, locationSensor, glimpseSensor, glimpse, start, feedback,
+        transfer, classifier, rewardPredictor, rho);
+
+  // Either we have to train a model, or load a model.
+  if (CLI::HasParam("training"))
+  {
+    arma::mat trainingData = std::move(CLI::GetParam<arma::mat>("training"));
+
+    arma::mat labels;
+
+    // Did the user pass in labels?
+    if (CLI::HasParam("labels"))
+    {
+      // Load labels.
+      labels = std::move(CLI::GetParam<arma::mat>("labels"));
+
+      // Do the labels need to be transposed?
+      if (labels.n_cols == 1)
+        labels = labels.t();
+    }
+
+    // Now run the optimization.
+    if (optimizerType == "sgd")
+    {
+      SGD<decltype(net)> opt(net);
+      opt.StepSize() = stepSize;
+      opt.MaxIterations() = maxIterations;
+      opt.Tolerance() = tolerance;
+      opt.Shuffle() = shuffle;
+
+      Timer::Start("rmva_training");
+      net.Train(trainingData, labels, opt);
+      Timer::Stop("rmva_training");
+    }
+    else if (optimizerType == "minibatch-sgd")
+    {
+      MiniBatchSGD<decltype(net)> opt(net);
+      opt.StepSize() = stepSize;
+      opt.MaxIterations() = maxIterations;
+      opt.Tolerance() = tolerance;
+      opt.Shuffle() = shuffle;
+      opt.BatchSize() = batchSize;
+
+      Timer::Start("rmva_training");
+      net.Train(trainingData, labels, opt);
+      Timer::Stop("rmva_training");
+    }
+  }
+  else
+  {
+    // Load the model from file.
+    data::Load(CLI::GetParam<string>("input_model_file"), "rmva_model", net);
+  }
+
+  // Do we need to do testing?
+  if (CLI::HasParam("test"))
+  {
+    arma::mat testingData = std::move(CLI::GetParam<arma::mat>("test"));
+
+    // Time the running of the Naive Bayes Classifier.
+    arma::mat results;
+    Timer::Start("rmva_testing");
+    net.Predict(testingData, results);
+    Timer::Stop("rmva_testing");
+
+    if (CLI::HasParam("output"))
+      CLI::GetParam<arma::mat>("output") = std::move(results);
+  }
+
+  // Save the model, if requested.
+  if (CLI::HasParam("output_model_file"))
+    data::Save(CLI::GetParam<string>("output_model_file"), "rmva_model", net);
+}
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression.hpp b/src/mlpack/methods/softmax_regression/softmax_regression.hpp
index c5c31e3..3712c4a 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_SOFTMAX_REGRESSION_SOFTMAX_REGRESSION_HPP
 #define MLPACK_METHODS_SOFTMAX_REGRESSION_SOFTMAX_REGRESSION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
 
 #include "softmax_regression_function.hpp"
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp b/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp
index 5b0e731..dc91f1d 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression_function.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_SOFTMAX_REGRESSION_SOFTMAX_REGRESSION_FUNCTION_HPP
 #define MLPACK_METHODS_SOFTMAX_REGRESSION_SOFTMAX_REGRESSION_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace regression {
diff --git a/src/mlpack/methods/softmax_regression/softmax_regression_main.cpp b/src/mlpack/methods/softmax_regression/softmax_regression_main.cpp
index 18c310b..0be9d7b 100644
--- a/src/mlpack/methods/softmax_regression/softmax_regression_main.cpp
+++ b/src/mlpack/methods/softmax_regression/softmax_regression_main.cpp
@@ -8,7 +8,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include <mlpack/methods/softmax_regression/softmax_regression.hpp>
 #include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
 
diff --git a/src/mlpack/methods/sparse_autoencoder/maximal_inputs.hpp b/src/mlpack/methods/sparse_autoencoder/maximal_inputs.hpp
index 0c31392..2ee3242 100644
--- a/src/mlpack/methods/sparse_autoencoder/maximal_inputs.hpp
+++ b/src/mlpack/methods/sparse_autoencoder/maximal_inputs.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_NN_MAXIMAL_INPUTS_HPP
 #define MLPACK_METHODS_NN_MAXIMAL_INPUTS_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace nn {
diff --git a/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder.hpp b/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder.hpp
index aac73c2..0c604c9 100644
--- a/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder.hpp
+++ b/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder.hpp
@@ -12,7 +12,7 @@
 #ifndef MLPACK_METHODS_SPARSE_AUTOENCODER_SPARSE_AUTOENCODER_HPP
 #define MLPACK_METHODS_SPARSE_AUTOENCODER_SPARSE_AUTOENCODER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/core/optimizers/lbfgs/lbfgs.hpp>
 
 #include "sparse_autoencoder_function.hpp"
diff --git a/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder_function.hpp b/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder_function.hpp
index 14fb811..d05c16d 100644
--- a/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder_function.hpp
+++ b/src/mlpack/methods/sparse_autoencoder/sparse_autoencoder_function.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_SPARSE_AUTOENCODER_SPARSE_AUTOENCODER_FUNCTION_HPP
 #define MLPACK_METHODS_SPARSE_AUTOENCODER_SPARSE_AUTOENCODER_FUNCTION_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace nn {
diff --git a/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp b/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp
index 6609574..dbe01ec 100644
--- a/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp
+++ b/src/mlpack/methods/sparse_coding/data_dependent_random_initializer.hpp
@@ -12,7 +12,8 @@
 #ifndef MLPACK_METHODS_SPARSE_CODING_DATA_DEPENDENT_RANDOM_INITIALIZER_HPP
 #define MLPACK_METHODS_SPARSE_CODING_DATA_DEPENDENT_RANDOM_INITIALIZER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/math/random.hpp>
 
 namespace mlpack {
 namespace sparse_coding {
diff --git a/src/mlpack/methods/sparse_coding/nothing_initializer.hpp b/src/mlpack/methods/sparse_coding/nothing_initializer.hpp
index a6a5b4d..972bf3c 100644
--- a/src/mlpack/methods/sparse_coding/nothing_initializer.hpp
+++ b/src/mlpack/methods/sparse_coding/nothing_initializer.hpp
@@ -14,7 +14,7 @@
 #ifndef MLPACK_METHODS_SPARSE_CODING_NOTHING_INITIALIZER_HPP
 #define MLPACK_METHODS_SPARSE_CODING_NOTHING_INITIALIZER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace sparse_coding {
diff --git a/src/mlpack/methods/sparse_coding/random_initializer.hpp b/src/mlpack/methods/sparse_coding/random_initializer.hpp
index ad96df4..ce92064 100644
--- a/src/mlpack/methods/sparse_coding/random_initializer.hpp
+++ b/src/mlpack/methods/sparse_coding/random_initializer.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_SPARSE_CODING_RANDOM_INITIALIZER_HPP
 #define MLPACK_METHODS_SPARSE_CODING_RANDOM_INITIALIZER_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 
 namespace mlpack {
 namespace sparse_coding {
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding.cpp b/src/mlpack/methods/sparse_coding/sparse_coding.cpp
index 6d76ffe..c7f1d67 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding.cpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding.cpp
@@ -11,6 +11,8 @@
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
 #include "sparse_coding.hpp"
+#include <mlpack/core/math/lin_alg.hpp>
+#include <mlpack/core/util/param.hpp>
 
 namespace mlpack {
 namespace sparse_coding {
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding.hpp b/src/mlpack/methods/sparse_coding/sparse_coding.hpp
index 9f7f3c8..4913cbc 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding.hpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding.hpp
@@ -13,7 +13,7 @@
 #ifndef MLPACK_METHODS_SPARSE_CODING_SPARSE_CODING_HPP
 #define MLPACK_METHODS_SPARSE_CODING_SPARSE_CODING_HPP
 
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
 #include <mlpack/methods/lars/lars.hpp>
 
 // Include our three simple dictionary initializers.
diff --git a/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp b/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
index 7a571b5..2036906 100644
--- a/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
+++ b/src/mlpack/methods/sparse_coding/sparse_coding_main.cpp
@@ -9,7 +9,8 @@
  * 3-clause BSD license along with mlpack.  If not, see
  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
  */
-#include <mlpack/core.hpp>
+#include <mlpack/prereqs.hpp>
+#include <mlpack/core/util/param.hpp>
 #include "sparse_coding.hpp"
 
 PROGRAM_INFO("Sparse Coding", "An implementation of Sparse Coding with "
diff --git a/src/mlpack/prereqs.hpp b/src/mlpack/prereqs.hpp
index 4849487..01ac0ff 100644
--- a/src/mlpack/prereqs.hpp
+++ b/src/mlpack/prereqs.hpp
@@ -73,11 +73,16 @@
 
 // Now include Armadillo through the special mlpack extensions.
 #include <mlpack/core/arma_extend/arma_extend.hpp>
+#include <mlpack/core/util/arma_traits.hpp>
 
 // Ensure that the user isn't doing something stupid with their Armadillo
 // defines.
 #include <mlpack/core/util/arma_config_check.hpp>
 
+// All code should have access to logging
+#include <mlpack/core/util/log.hpp>
+#include <mlpack/core/util/timers.hpp>
+
 // On Visual Studio, disable C4519 (default arguments for function templates)
 // since it's by default an error, which doesn't even make any sense because
 // it's part of the C++11 standard.
diff --git a/src/mlpack/tests/lars_test.cpp b/src/mlpack/tests/lars_test.cpp
index 3c66984..3cae513 100644
--- a/src/mlpack/tests/lars_test.cpp
+++ b/src/mlpack/tests/lars_test.cpp
@@ -13,6 +13,7 @@
 // Note: We don't use BOOST_REQUIRE_CLOSE in the code below because we need
 // to use FPC_WEAK, and it's not at all intuitive how to do that.
 #include <mlpack/methods/lars/lars.hpp>
+#include <mlpack/core/data/load.hpp>
 
 #include <boost/test/unit_test.hpp>
 #include "test_tools.hpp"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list