[mlpack] 191/207: Refactor DatasetMapper to allow a first pass over data.

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu Mar 23 17:53:53 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit 22c3192add0e98209c4f6a8b00c7ff863aa0681f
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sat Mar 18 13:26:58 2017 -0400

    Refactor DatasetMapper to allow a first pass over data.
---
 src/mlpack/core/data/dataset_mapper.hpp      | 19 +++++++++---
 src/mlpack/core/data/dataset_mapper_impl.hpp | 45 ++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/src/mlpack/core/data/dataset_mapper.hpp b/src/mlpack/core/data/dataset_mapper.hpp
index 75d60be..77ffc97 100644
--- a/src/mlpack/core/data/dataset_mapper.hpp
+++ b/src/mlpack/core/data/dataset_mapper.hpp
@@ -51,16 +51,28 @@ class DatasetMapper
   explicit DatasetMapper(PolicyType& policy, const size_t dimensionality = 0);
 
   /**
+   * Preprocessing: during a first pass of the data, pass the strings on to the
+   * MapPolicy if they are needed.
+   *
+   * @param string String to map.
+   * @param dimension Dimension to map for.
+   */
+  template<typename T>
+  void MapFirstPass(const std::string& string, const size_t dimension);
+
+  /**
    * Given the string and the dimension to which it belongs, return its numeric
    * mapping.  If no mapping yet exists, the string is added to the list of
    * mappings for the given dimension.  The dimension parameter refers to the
    * index of the dimension of the string (i.e. the row in the dataset).
    *
+   * @tparam T Numeric type to map to (int/double/float/etc.).
    * @param string String to find/create mapping for.
    * @param dimension Index of the dimension of the string.
    */
-  typename PolicyType::MappedType MapString(const std::string& string,
-                                            const size_t dimension);
+  template<typename T>
+  T MapString(const std::string& string,
+              const size_t dimension);
 
   /**
    * Return the string that corresponds to a given value in a given dimension.
@@ -134,8 +146,7 @@ class DatasetMapper
 
   //! Modify the policy of the mapper (be careful!).
   PolicyType& Policy();
-
-  //! Modify (Replace) the policy of the mapper with a new policy
+  //! Modify (Replace) the policy of the mapper with a new policy.
   void Policy(PolicyType&& policy);
 
  private:
diff --git a/src/mlpack/core/data/dataset_mapper_impl.hpp b/src/mlpack/core/data/dataset_mapper_impl.hpp
index 854acd7..2e1471c 100644
--- a/src/mlpack/core/data/dataset_mapper_impl.hpp
+++ b/src/mlpack/core/data/dataset_mapper_impl.hpp
@@ -36,14 +36,47 @@ inline DatasetMapper<PolicyType>::DatasetMapper(PolicyType& policy,
   // Nothing to initialize here.
 }
 
-// When we want to insert value into the map,
-// we could use the policy to map the string
-template<typename PolicyType>
-inline typename PolicyType::MappedType DatasetMapper<PolicyType>::MapString(
+// Utility helper function to call MapFirstPass.
+template<typename PolicyType, typename T>
+void CallMapFirstPass(
+    PolicyType& policy,
     const std::string& string,
-    const size_t dimension)
+    const size_t dimension,
+    std::vector<Datatype>& types,
+    const typename std::enable_if<PolicyType::NeedsFirstPass>::type* = 0)
+{
+  policy.template MapFirstPass<T>(string, dimension, types);
+}
+
+// Utility helper function that doesn't call anything.
+template<typename PolicyType, typename T>
+void CallMapFirstPass(
+    PolicyType& /* policy */,
+    const std::string& /* string */,
+    const size_t /* dimension */,
+    std::vector<Datatype>& /* types */,
+    const typename std::enable_if<!PolicyType::NeedsFirstPass>::type* = 0)
+{
+  // Nothing to do here.
+}
+
+template<typename PolicyType>
+template<typename T>
+void DatasetMapper<PolicyType>::MapFirstPass(const std::string& string,
+                                             const size_t dimension)
+{
+  // Call the correct overload (via SFINAE).
+  CallMapFirstPass<PolicyType, T>(policy, string, dimension, types);
+}
+
+// When we want to insert value into the map, we use the policy to map the
+// string.
+template<typename PolicyType>
+template<typename T>
+inline T DatasetMapper<PolicyType>::MapString(const std::string& string,
+                                              const size_t dimension)
 {
-  return policy.template MapString<MapType>(string, dimension, maps, types);
+  return policy.template MapString<MapType, T>(string, dimension, maps, types);
 }
 
 // Return the string corresponding to a value in a given dimension.

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list