[mlpack] 191/207: Refactor DatasetMapper to allow a first pass over data.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Thu Mar 23 17:53:53 UTC 2017
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch master
in repository mlpack.
commit 22c3192add0e98209c4f6a8b00c7ff863aa0681f
Author: Ryan Curtin <ryan at ratml.org>
Date: Sat Mar 18 13:26:58 2017 -0400
Refactor DatasetMapper to allow a first pass over data.
---
src/mlpack/core/data/dataset_mapper.hpp | 19 +++++++++---
src/mlpack/core/data/dataset_mapper_impl.hpp | 45 ++++++++++++++++++++++++----
2 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/src/mlpack/core/data/dataset_mapper.hpp b/src/mlpack/core/data/dataset_mapper.hpp
index 75d60be..77ffc97 100644
--- a/src/mlpack/core/data/dataset_mapper.hpp
+++ b/src/mlpack/core/data/dataset_mapper.hpp
@@ -51,16 +51,28 @@ class DatasetMapper
explicit DatasetMapper(PolicyType& policy, const size_t dimensionality = 0);
/**
+ * Preprocessing: during a first pass of the data, pass the strings on to the
+ * MapPolicy if they are needed.
+ *
+ * @param string String to map.
+ * @param dimension Dimension to map for.
+ */
+ template<typename T>
+ void MapFirstPass(const std::string& string, const size_t dimension);
+
+ /**
* Given the string and the dimension to which it belongs, return its numeric
* mapping. If no mapping yet exists, the string is added to the list of
* mappings for the given dimension. The dimension parameter refers to the
* index of the dimension of the string (i.e. the row in the dataset).
*
+ * @tparam T Numeric type to map to (int/double/float/etc.).
* @param string String to find/create mapping for.
* @param dimension Index of the dimension of the string.
*/
- typename PolicyType::MappedType MapString(const std::string& string,
- const size_t dimension);
+ template<typename T>
+ T MapString(const std::string& string,
+ const size_t dimension);
/**
* Return the string that corresponds to a given value in a given dimension.
@@ -134,8 +146,7 @@ class DatasetMapper
//! Modify the policy of the mapper (be careful!).
PolicyType& Policy();
-
- //! Modify (Replace) the policy of the mapper with a new policy
+ //! Modify (Replace) the policy of the mapper with a new policy.
void Policy(PolicyType&& policy);
private:
diff --git a/src/mlpack/core/data/dataset_mapper_impl.hpp b/src/mlpack/core/data/dataset_mapper_impl.hpp
index 854acd7..2e1471c 100644
--- a/src/mlpack/core/data/dataset_mapper_impl.hpp
+++ b/src/mlpack/core/data/dataset_mapper_impl.hpp
@@ -36,14 +36,47 @@ inline DatasetMapper<PolicyType>::DatasetMapper(PolicyType& policy,
// Nothing to initialize here.
}
-// When we want to insert value into the map,
-// we could use the policy to map the string
-template<typename PolicyType>
-inline typename PolicyType::MappedType DatasetMapper<PolicyType>::MapString(
+// Utility helper function to call MapFirstPass.
+template<typename PolicyType, typename T>
+void CallMapFirstPass(
+ PolicyType& policy,
const std::string& string,
- const size_t dimension)
+ const size_t dimension,
+ std::vector<Datatype>& types,
+ const typename std::enable_if<PolicyType::NeedsFirstPass>::type* = 0)
+{
+ policy.template MapFirstPass<T>(string, dimension, types);
+}
+
+// Utility helper function that doesn't call anything.
+template<typename PolicyType, typename T>
+void CallMapFirstPass(
+ PolicyType& /* policy */,
+ const std::string& /* string */,
+ const size_t /* dimension */,
+ std::vector<Datatype>& /* types */,
+ const typename std::enable_if<!PolicyType::NeedsFirstPass>::type* = 0)
+{
+ // Nothing to do here.
+}
+
+template<typename PolicyType>
+template<typename T>
+void DatasetMapper<PolicyType>::MapFirstPass(const std::string& string,
+ const size_t dimension)
+{
+ // Call the correct overload (via SFINAE).
+ CallMapFirstPass<PolicyType, T>(policy, string, dimension, types);
+}
+
+// When we want to insert value into the map, we use the policy to map the
+// string.
+template<typename PolicyType>
+template<typename T>
+inline T DatasetMapper<PolicyType>::MapString(const std::string& string,
+ const size_t dimension)
{
- return policy.template MapString<MapType>(string, dimension, maps, types);
+ return policy.template MapString<MapType, T>(string, dimension, maps, types);
}
// Return the string corresponding to a value in a given dimension.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list