[mlpack] 31/35: Remove files that are not to be released.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Thu Sep 15 23:29:44 UTC 2016
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch master
in repository mlpack.
commit 19c3906c3d843a161183009947197f27ccc18917
Author: Ryan Curtin <ryan at ratml.org>
Date: Thu Jul 21 18:04:04 2016 -0400
Remove files that are not to be released.
---
.../tree/rectangle_tree/r_plus_tree_split_impl.hpp | 363 ---------------------
src/mlpack/methods/ann/layer/one_hot_layer.hpp | 91 ------
.../methods/ann/layer/vr_class_reward_layer.hpp | 166 ----------
src/mlpack/methods/mvu/mvu_main.cpp | 75 -----
.../preprocess/preprocess_binarize_main.cpp | 85 -----
5 files changed, 780 deletions(-)
diff --git a/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split_impl.hpp
deleted file mode 100644
index 67025d5..0000000
--- a/src/mlpack/core/tree/rectangle_tree/r_plus_tree_split_impl.hpp
+++ /dev/null
@@ -1,363 +0,0 @@
-/**
- * @file r_plus_tree_split_impl.hpp
- * @author Mikhail Lozhnikov
- *
- * Implementation of class (RPlusTreeSplit) to split a RectangleTree.
- */
-#ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_IMPL_HPP
-#define MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_IMPL_HPP
-
-#include "r_plus_tree_split.hpp"
-#include "rectangle_tree.hpp"
-#include "r_plus_plus_tree_auxiliary_information.hpp"
-#include "r_plus_tree_split_policy.hpp"
-#include "r_plus_plus_tree_split_policy.hpp"
-
-namespace mlpack {
-namespace tree {
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-void RPlusTreeSplit<SplitPolicyType, SweepType>::
-SplitLeafNode(TreeType* tree, std::vector<bool>& relevels)
-{
- typedef typename TreeType::ElemType ElemType;
-
- if (tree->Count() == 1)
- {
- // Check if an intermediate node was added during the insertion process.
- // i.e. we couldn't enlarge a node of the R+ tree. So, one of intermediate
- // nodes may be overflowed.
- TreeType* node = tree->Parent();
-
- while (node != NULL)
- {
- if (node->NumChildren() == node->MaxNumChildren() + 1)
- {
- // Split the overflowed node.
- RPlusTreeSplit::SplitNonLeafNode(node, relevels);
- return;
- }
- node = node->Parent();
- }
- return;
- }
- else if (tree->Count() <= tree->MaxLeafSize())
- {
- return;
- }
-
- // If we are splitting the root node, we need will do things differently so
- // that the constructor and other methods don't confuse the end user by giving
- // an address of another node.
- if (tree->Parent() == NULL)
- {
- // We actually want to copy this way. Pointers and everything.
- TreeType* copy = new TreeType(*tree, false);
- copy->Parent() = tree;
- tree->Count() = 0;
- tree->NullifyData();
- // Because this was a leaf node, numChildren must be 0.
- tree->children[(tree->NumChildren())++] = copy;
- assert(tree->NumChildren() == 1);
-
- RPlusTreeSplit::SplitLeafNode(copy, relevels);
- return;
- }
-
- size_t cutAxis = tree->Bound().Dim();
- ElemType cut = std::numeric_limits<ElemType>::lowest();
-
- // Try to find a partiotion of the node.
- if (!PartitionNode(tree, cutAxis, cut))
- return;
-
- // If we could not find a suitable partition.
- if (cutAxis == tree->Bound().Dim())
- {
- tree->MaxLeafSize()++;
- tree->points.resize(tree->MaxLeafSize() + 1);
- Log::Warn << "Could not find an acceptable partition."
- "The size of the node will be increased.";
- return;
- }
-
- TreeType* treeOne = new TreeType(tree->Parent());
- TreeType* treeTwo = new TreeType(tree->Parent());
- treeOne->MinLeafSize() = 0;
- treeOne->MinNumChildren() = 0;
- treeTwo->MinLeafSize() = 0;
- treeTwo->MinNumChildren() = 0;
-
- // Split the node into two new nodes.
- SplitLeafNodeAlongPartition(tree, treeOne, treeTwo, cutAxis, cut);
-
- TreeType* parent = tree->Parent();
- size_t i = 0;
- while (parent->children[i] != tree)
- i++;
-
- assert(i < parent->NumChildren());
-
- // Insert two new nodes to the tree.
- parent->children[i] = treeOne;
- parent->children[parent->NumChildren()++] = treeTwo;
-
- assert(parent->NumChildren() <= parent->MaxNumChildren() + 1);
-
- // Propagate the split upward if necessary.
- if (parent->NumChildren() == parent->MaxNumChildren() + 1)
- RPlusTreeSplit::SplitNonLeafNode(parent, relevels);
-
- tree->SoftDelete();
-}
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-bool RPlusTreeSplit<SplitPolicyType, SweepType>::
-SplitNonLeafNode(TreeType* tree, std::vector<bool>& relevels)
-{
- typedef typename TreeType::ElemType ElemType;
- // If we are splitting the root node, we need will do things differently so
- // that the constructor and other methods don't confuse the end user by giving
- // an address of another node.
- if (tree->Parent() == NULL)
- {
- // We actually want to copy this way. Pointers and everything.
- TreeType* copy = new TreeType(*tree, false);
-
- copy->Parent() = tree;
- tree->NumChildren() = 0;
- tree->NullifyData();
- tree->children[(tree->NumChildren())++] = copy;
-
- RPlusTreeSplit::SplitNonLeafNode(copy,relevels);
- return true;
- }
- size_t cutAxis = tree->Bound().Dim();
- ElemType cut = std::numeric_limits<ElemType>::lowest();
-
- // Try to find a partiotion of the node.
- if ( !PartitionNode(tree, cutAxis, cut))
- return false;
-
- // If we could not find a suitable partition.
- if (cutAxis == tree->Bound().Dim())
- {
- tree->MaxNumChildren()++;
- tree->children.resize(tree->MaxNumChildren() + 1);
- Log::Warn << "Could not find an acceptable partition."
- "The size of the node will be increased.";
- return false;
- }
-
- TreeType* treeOne = new TreeType(tree->Parent());
- TreeType* treeTwo = new TreeType(tree->Parent());
- treeOne->MinLeafSize() = 0;
- treeOne->MinNumChildren() = 0;
- treeTwo->MinLeafSize() = 0;
- treeTwo->MinNumChildren() = 0;
-
- // Split the node into two new nodes.
- SplitNonLeafNodeAlongPartition(tree, treeOne, treeTwo, cutAxis, cut);
-
- TreeType* parent = tree->Parent();
- size_t i = 0;
- while (parent->children[i] != tree)
- i++;
-
- assert(i < parent->NumChildren());
-
- // Insert two new nodes to the tree.
- parent->children[i] = treeOne;
- parent->children[parent->NumChildren()++] = treeTwo;
-
- tree->SoftDelete();
-
- assert(parent->NumChildren() <= parent->MaxNumChildren() + 1);
-
- // Propagate the split upward if necessary.
- if (parent->NumChildren() == parent->MaxNumChildren() + 1)
- RPlusTreeSplit::SplitNonLeafNode(parent, relevels);
-
- return false;
-}
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-void RPlusTreeSplit<SplitPolicyType, SweepType>::SplitLeafNodeAlongPartition(
- TreeType* tree,
- TreeType* treeOne,
- TreeType* treeTwo,
- const size_t cutAxis,
- const typename TreeType::ElemType cut)
-{
- // Split the auxiliary information.
- tree->AuxiliaryInfo().SplitAuxiliaryInfo(treeOne, treeTwo, cutAxis, cut);
-
- // Insert points into the corresponding subtree.
- for (size_t i = 0; i < tree->NumPoints(); i++)
- {
- if (tree->Dataset().col(tree->Point(i))[cutAxis] <= cut)
- {
- treeOne->Point(treeOne->Count()++) = tree->Point(i);
- treeOne->Bound() |= tree->Dataset().col(tree->Point(i));
- }
- else
- {
- treeTwo->Point(treeTwo->Count()++) = tree->Point(i);
- treeTwo->Bound() |= tree->Dataset().col(tree->Point(i));
- }
- }
- // Update the number of descandants.
- treeOne->numDescendants = treeOne->Count();
- treeTwo->numDescendants = treeTwo->Count();
-
- assert(treeOne->Count() <= treeOne->MaxLeafSize());
- assert(treeTwo->Count() <= treeTwo->MaxLeafSize());
-
- assert(tree->Count() == treeOne->Count() + treeTwo->Count());
- assert(treeOne->Bound()[cutAxis].Hi() < treeTwo->Bound()[cutAxis].Lo());
-}
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-void RPlusTreeSplit<SplitPolicyType, SweepType>::SplitNonLeafNodeAlongPartition(
- TreeType* tree,
- TreeType* treeOne,
- TreeType* treeTwo,
- const size_t cutAxis,
- const typename TreeType::ElemType cut)
-{
- // Split the auxiliary information.
- tree->AuxiliaryInfo().SplitAuxiliaryInfo(treeOne, treeTwo, cutAxis, cut);
-
- // Insert children into the corresponding subtree.
- for (size_t i = 0; i < tree->NumChildren(); i++)
- {
- TreeType* child = tree->children[i];
- int policy = SplitPolicyType::GetSplitPolicy(*child, cutAxis, cut);
-
- if (policy == SplitPolicyType::AssignToFirstTree)
- {
- InsertNodeIntoTree(treeOne, child);
- child->Parent() = treeOne;
- }
- else if (policy == SplitPolicyType::AssignToSecondTree)
- {
- InsertNodeIntoTree(treeTwo, child);
- child->Parent() = treeTwo;
- }
- else
- {
- // The child should be split (i.e. the partition divides its bound).
- TreeType* childOne = new TreeType(treeOne);
- TreeType* childTwo = new TreeType(treeTwo);
- treeOne->MinLeafSize() = 0;
- treeOne->MinNumChildren() = 0;
- treeTwo->MinLeafSize() = 0;
- treeTwo->MinNumChildren() = 0;
-
- // Propagate the split downward.
- if (child->IsLeaf())
- SplitLeafNodeAlongPartition(child, childOne, childTwo, cutAxis, cut);
- else
- SplitNonLeafNodeAlongPartition(child, childOne, childTwo, cutAxis, cut);
-
- InsertNodeIntoTree(treeOne, childOne);
- InsertNodeIntoTree(treeTwo, childTwo);
-
- child->SoftDelete();
- }
- }
-
- assert(treeOne->NumChildren() + treeTwo->NumChildren() != 0);
-
- // Add a fake subtree if one of the subtrees is empty.
- if (treeOne->NumChildren() == 0)
- AddFakeNodes(treeTwo, treeOne);
- else if (treeTwo->NumChildren() == 0)
- AddFakeNodes(treeOne, treeTwo);
-
- assert(treeOne->NumChildren() <= treeOne->MaxNumChildren());
- assert(treeTwo->NumChildren() <= treeTwo->MaxNumChildren());
-}
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-void RPlusTreeSplit<SplitPolicyType, SweepType>::
-AddFakeNodes(const TreeType* tree, TreeType* emptyTree)
-{
- size_t numDescendantNodes = tree->TreeDepth() - 1;
-
- TreeType* node = emptyTree;
- for (size_t i = 0; i < numDescendantNodes; i++)
- {
- TreeType* child = new TreeType(node);
- node->children[node->NumChildren()++] = child;
-
- node = child;
- }
-}
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-bool RPlusTreeSplit<SplitPolicyType, SweepType>::
-PartitionNode(const TreeType* node, size_t& minCutAxis,
- typename TreeType::ElemType& minCut)
-{
- if ((node->NumChildren() <= node->MaxNumChildren() && !node->IsLeaf()) ||
- (node->Count() <= node->MaxLeafSize() && node->IsLeaf()))
- return false; // No partition required.
-
- // Define the type of the sweep cost.
- typedef typename
- SweepType<SplitPolicyType>::template SweepCost<TreeType>::type
- SweepCostType;
-
- SweepCostType minCost = std::numeric_limits<SweepCostType>::max();
- minCutAxis = node->Bound().Dim();
-
- // Find the sweep with a minimal cost.
- for (size_t k = 0; k < node->Bound().Dim(); k++)
- {
- typename TreeType::ElemType cut;
- SweepCostType cost;
-
- if (node->IsLeaf())
- cost = SweepType<SplitPolicyType>::SweepLeafNode(k, node, cut);
- else
- cost = SweepType<SplitPolicyType>::SweepNonLeafNode(k, node, cut);
-
- if (cost < minCost)
- {
- minCost = cost;
- minCutAxis = k;
- minCut = cut;
- }
- }
-
- return true;
-}
-
-template<typename SplitPolicyType,
- template<typename> class SweepType>
-template<typename TreeType>
-void RPlusTreeSplit<SplitPolicyType, SweepType>::
-InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
-{
- destTree->Bound() |= srcNode->Bound();
- destTree->numDescendants += srcNode->numDescendants;
- destTree->children[destTree->NumChildren()++] = srcNode;
-}
-
-} // namespace tree
-} // namespace mlpack
-
-#endif // MLPACK_CORE_TREE_RECTANGLE_TREE_R_PLUS_TREE_SPLIT_IMPL_HPP
diff --git a/src/mlpack/methods/ann/layer/one_hot_layer.hpp b/src/mlpack/methods/ann/layer/one_hot_layer.hpp
deleted file mode 100644
index 671696e..0000000
--- a/src/mlpack/methods/ann/layer/one_hot_layer.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * @file one_hot_layer.hpp
- * @author Shangtong Zhang
- *
- * Definition of the OneHotLayer class, which implements a standard network
- * layer.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_ONE_HOT_LAYER_HPP
-
-#include <mlpack/core.hpp>
-#include <mlpack/methods/ann/layer/layer_traits.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * An implementation of a one hot classification layer that can be used as
- * output layer.
- */
-class OneHotLayer
-{
- public:
- /**
- * Create the OneHotLayer object.
- */
- OneHotLayer()
- {
- // Nothing to do here.
- }
-
- /*
- * Calculate the error using the specified input activation and the target.
- * The error is stored into the given error parameter.
- *
- * @param inputActivations Input data used for evaluating the network.
- * @param target Target data used for evaluating the network.
- * @param error The calculated error with respect to the input activation and
- * the given target.
- */
- template<typename DataType>
- void CalculateError(const DataType& inputActivations,
- const DataType& target,
- DataType& error)
- {
- error = inputActivations - target;
- }
-
- /*
- * Calculate the output class using the specified input activation.
- *
- * @param inputActivations Input data used to calculate the output class.
- * @param output Output class of the input activation.
- */
- template<typename DataType>
- void OutputClass(const DataType& inputActivations, DataType& output)
- {
- output = inputActivations;
- output.zeros();
-
- arma::uword maxIndex = 0;
- inputActivations.max(maxIndex);
- output(maxIndex) = 1;
- }
-
- /**
- * Serialize the layer.
- */
- template<typename Archive>
- void Serialize(Archive& /* ar */, const unsigned int /* version */)
- {
- /* Nothing to do here */
- }
-}; // class OneHotLayer
-
-//! Layer traits for the one-hot class classification layer.
-template <>
-class LayerTraits<OneHotLayer>
-{
- public:
- static const bool IsBinary = true;
- static const bool IsOutputLayer = true;
- static const bool IsBiasLayer = false;
- static const bool IsConnection = false;
-};
-
-} // namespace ann
-} // namespace mlpack
-
-
-#endif
diff --git a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp b/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
deleted file mode 100644
index aec0b85..0000000
--- a/src/mlpack/methods/ann/layer/vr_class_reward_layer.hpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * @file vr_class_reward_layer.hpp
- * @author Marcus Edel
- *
- * Definition of the VRClassRewardLayer class, which implements the variance
- * reduced classification reinforcement layer.
- */
-#ifndef MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
-#define MLPACK_METHODS_ANN_LAYER_VR_CLASS_REWARD_LAYER_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace ann /** Artificial Neural Network. */ {
-
-/**
- * Implementation of the variance reduced classification reinforcement layer.
- * This layer is meant to be used in combination with the reinforce normal layer
- * (ReinforceNormalLayer), which expects that an reward:
- * (1 for success, 0 otherwise).
- *
- * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
- * arma::sp_mat or arma::cube).
- * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
- * arma::sp_mat or arma::cube).
- */
-template <
- typename InputDataType = arma::field<arma::mat>,
- typename OutputDataType = arma::field<arma::mat>
->
-class VRClassRewardLayer
-{
- public:
- /**
- * Create the VRClassRewardLayer object.
- *
- * @param scale Parameter used to scale the reward.
- * @param sizeAverage Take the average over all batches.
- */
- VRClassRewardLayer(const double scale = 1, const bool sizeAverage = true) :
- scale(scale),
- sizeAverage(sizeAverage)
- {
- // Nothing to do here.
- }
-
- /**
- * Ordinary feed forward pass of a neural network, evaluating the function
- * f(x) by propagating the activity forward through f.
- *
- * @param input Input data that contains the log-probabilities for each class.
- * @param target The target vector, that contains the class index in the range
- * between 1 and the number of classes.
- */
- template<typename eT>
- double Forward(const arma::field<arma::Mat<eT> >& input,
- const arma::Mat<eT>& target)
- {
- return Forward(input(0, 0), target);
- }
-
- /**
- * Ordinary feed forward pass of a neural network, evaluating the function
- * f(x) by propagating the activity forward through f.
- *
- * @param input Input data that contains the log-probabilities for each class.
- * @param target The target vector, that contains the class index in the range
- * between 1 and the number of classes.
- */
- template<typename eT>
- double Forward(const arma::Mat<eT>& input, const arma::Mat<eT>& target)
- {
- reward = 0;
- arma::uword index = 0;
-
- for (size_t i = 0; i < input.n_cols; i++)
- {
- input.unsafe_col(i).max(index);
- reward = ((index + 1) == target(i)) * scale;
- }
-
- if (sizeAverage)
- {
- return -reward / input.n_cols;
- }
-
- return -reward;
- }
-
- /**
- * Ordinary feed backward pass of a neural network, calculating the function
- * f(x) by propagating x backwards through f. Using the results from the feed
- * forward pass.
- *
- * @param input The propagated input activation.
- * @param gy The backpropagated error.
- * @param g The calculated gradient.
- */
- template<typename eT>
- double Backward(const arma::field<arma::Mat<eT> >& input,
- const arma::Mat<eT>& /* gy */,
- arma::field<arma::Mat<eT> >& g)
- {
- g = arma::field<arma::Mat<eT> >(2, 1);
- g(0, 0) = arma::zeros(input(0, 0).n_rows, input(0, 0).n_cols);
-
- double vrReward = reward - arma::as_scalar(input(1, 0));
- if (sizeAverage)
- {
- vrReward /= input(0, 0).n_cols;
- }
-
- const double norm = sizeAverage ? 2.0 / input.n_cols : 2.0;
-
- g(1, 0) = norm * (input(1, 0) - reward);
-
- return vrReward;
- }
-
- //! Get the input parameter.
- InputDataType& InputParameter() const {return inputParameter; }
- //! Modify the input parameter.
- InputDataType& InputParameter() { return inputParameter; }
-
- //! Get the output parameter.
- OutputDataType& OutputParameter() const {return outputParameter; }
- //! Modify the output parameter.
- OutputDataType& OutputParameter() { return outputParameter; }
-
- //! Get the delta.
- OutputDataType& Delta() const {return delta; }
- //! Modify the delta.
- OutputDataType& Delta() { return delta; }
-
- //! Get the value of the deterministic parameter.
- bool Deterministic() const { return deterministic; }
- //! Modify the value of the deterministic parameter.
- bool& Deterministic() { return deterministic; }
-
- private:
- //! Locally-stored value to scale the reward.
- const double scale;
-
- //! If true take the average over all batches.
- const bool sizeAverage;
-
- //! Locally stored reward parameter.
- double reward;
-
- //! Locally-stored delta object.
- OutputDataType delta;
-
- //! Locally-stored input parameter object.
- InputDataType inputParameter;
-
- //! Locally-stored output parameter object.
- OutputDataType outputParameter;
-
- //! If true dropout and scaling is disabled, see notes above.
- bool deterministic;
-}; // class VRClassRewardLayer
-
-}; // namespace ann
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/methods/mvu/mvu_main.cpp b/src/mlpack/methods/mvu/mvu_main.cpp
deleted file mode 100644
index de54048..0000000
--- a/src/mlpack/methods/mvu/mvu_main.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * @file mvu_main.cpp
- * @author Ryan Curtin
- *
- * Executable for MVU.
- *
- * Note: this implementation of MVU does not work. See #189.
- */
-#include <mlpack/core.hpp>
-#include "mvu.hpp"
-
-PROGRAM_INFO("Maximum Variance Unfolding (MVU)", "This program implements "
- "Maximum Variance Unfolding, a nonlinear dimensionality reduction "
- "technique. The method minimizes dimensionality by unfolding a manifold "
- "such that the distances to the nearest neighbors of each point are held "
- "constant.");
-
-PARAM_STRING_IN_REQ("input_file", "Filename of input dataset.", "i");
-PARAM_INT_IN_REQ("new_dim", "New dimensionality of dataset.", "d");
-
-PARAM_STRING_OUT("output_file", "Filename to save unfolded dataset to.", "o");
-PARAM_INT_IN("num_neighbors", "Number of nearest neighbors to consider while "
- "unfolding.", "k", 5);
-
-using namespace mlpack;
-using namespace mlpack::mvu;
-using namespace mlpack::math;
-using namespace arma;
-using namespace std;
-
-int main(int argc, char **argv)
-{
- // Read from command line.
- CLI::ParseCommandLine(argc, argv);
- const string inputFile = CLI::GetParam<string>("input_file");
- const string outputFile = CLI::GetParam<string>("output_file");
- const int newDim = CLI::GetParam<int>("new_dim");
- const int numNeighbors = CLI::GetParam<int>("num_neighbors");
-
- if (!CLI::HasParam("output_file"))
- Log::Warn << "--output_file (-o) is not specified; no results will be "
- << "saved!" << endl;
-
- RandomSeed(time(NULL));
-
- // Load input dataset.
- mat data;
- data::Load(inputFile, data, true);
-
- // Verify that the requested dimensionality is valid.
- if (newDim <= 0 || newDim > (int) data.n_rows)
- {
- Log::Fatal << "Invalid new dimensionality (" << newDim << "). Must be "
- << "between 1 and the input dataset dimensionality (" << data.n_rows
- << ")." << std::endl;
- }
-
- // Verify that the number of neighbors is valid.
- if (numNeighbors <= 0 || numNeighbors > (int) data.n_cols)
- {
- Log::Fatal << "Invalid number of neighbors (" << numNeighbors << "). Must "
- << "be between 1 and the number of points in the input dataset ("
- << data.n_cols << ")." << std::endl;
- }
-
- // Now run MVU.
- MVU mvu(data);
-
- mat output;
- mvu.Unfold(newDim, numNeighbors, output);
-
- // Save results to file.
- if (CLI::HasParam("output_file"))
- data::Save(outputFile, output, true);
-}
diff --git a/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp b/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp
deleted file mode 100644
index efad2bf..0000000
--- a/src/mlpack/methods/preprocess/preprocess_binarize_main.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * @file preprocess_binarize_main.cpp
- * @author Keon Kim
- *
- * binarize CLI executable
- */
-#include <mlpack/core.hpp>
-#include <mlpack/core/data/binarize.hpp>
-
-PROGRAM_INFO("Binarize Data", "This utility takes a dataset and binarizes the "
- "variables into either 0 or 1 given threshold. User can apply binarization "
- "on a dimension or the whole dataset. A dimension can be specified using "
- "--dimension (-d) option. Threshold can also be specified with the "
- "--threshold (-t) option; The default is 0.0."
- "\n\n"
- "The program does not modify the original file, but instead makes a "
- "separate file to save the binarized data; The program requires you to "
- "specify the file name with --output_file (-o)."
- "\n\n"
- "For example, if we want to make all variables greater than 5 in dataset "
- "to 1 and ones that are less than or equal to 5.0 to 0, and save the "
- "result to result.csv, we could run"
- "\n\n"
- "$ mlpack_preprocess_binarize -i dataset.csv -t 5 -o result.csv"
- "\n\n"
- "But if we want to apply this to only the first (0th) dimension of the "
- "dataset, we could run"
- "\n\n"
- "$ mlpack_preprocess_binarize -i dataset.csv -t 5 -d 0 -o result.csv");
-
-// Define parameters for data.
-PARAM_STRING_REQ("input_file", "File containing data.", "i");
-// Define optional parameters.
-PARAM_STRING("output_file", "File to save the output.", "o");
-PARAM_INT("dimension", "Dimension to apply the binarization. If not set, the"
- " program will binarize every dimension by default.", "d", 0);
-PARAM_DOUBLE("threshold", "Threshold to be applied for binarization. If not "
- "set, the threshold defaults to 0.0.", "t", 0.0);
-
-using namespace mlpack;
-using namespace arma;
-using namespace std;
-
-int main(int argc, char** argv)
-{
- // Parse command line options.
- CLI::ParseCommandLine(argc, argv);
- const string inputFile = CLI::GetParam<string>("input_file");
- const string outputFile = CLI::GetParam<string>("output_file");
- const size_t dimension = (size_t) CLI::GetParam<int>("dimension");
- const double threshold = CLI::GetParam<double>("threshold");
-
- // Check on data parameters.
- if (!CLI::HasParam("dimension"))
- Log::Warn << "You did not specify --dimension, so the program will perform "
- << "binarize on every dimensions." << endl;
-
- if (!CLI::HasParam("threshold"))
- Log::Warn << "You did not specify --threshold, so the threshold will be "
- << "automatically set to '0.0'." << endl;
-
- if (!CLI::HasParam("output_file"))
- Log::Warn << "You did not specify --output_file, so no result will be "
- << "saved." << endl;
-
- // Load the data.
- arma::mat input;
- arma::mat output;
- data::Load(inputFile, input, true);
-
- Timer::Start("binarize");
- if (CLI::HasParam("dimension"))
- {
- data::Binarize<double>(input, output, threshold, dimension);
- }
- else
- {
- // binarize the whole data
- data::Binarize<double>(input, output, threshold);
- }
- Timer::Stop("binarize");
-
- if (CLI::HasParam("output_file"))
- data::Save(outputFile, output, false);
-}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list