[mlpack] 189/207: Refactor MissingPolicy to handle when the string is not a part of missingTokens.

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu Mar 23 17:53:53 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit e4483b39539fb09a73ed3309d14e846c3865a329
Author: Ryan Curtin <ryan at ratml.org>
Date:   Fri Mar 17 16:29:32 2017 -0400

    Refactor MissingPolicy to handle when the string is not a part of missingTokens.
---
 .../core/data/map_policies/missing_policy.hpp      | 28 ++++++++--------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/mlpack/core/data/map_policies/missing_policy.hpp b/src/mlpack/core/data/map_policies/missing_policy.hpp
index 28f70c3..e8bb115 100644
--- a/src/mlpack/core/data/map_policies/missing_policy.hpp
+++ b/src/mlpack/core/data/map_policies/missing_policy.hpp
@@ -20,6 +20,7 @@
 
 namespace mlpack {
 namespace data {
+
 /**
  * MissingPolicy is used as a helper class for DatasetMapper. It tells how the
  * strings should be mapped. Purpose of this policy is to map all user-defined
@@ -71,28 +72,18 @@ class MissingPolicy
                        MapType& maps,
                        std::vector<Datatype>& /* types */)
   {
-    // If this condition is true, either we have no mapping for the given string
-    // or we have no mappings for the given dimension at all.  In either case,
-    // we create a mapping.
-    const double NaN = std::numeric_limits<double>::quiet_NaN();
-    if (missingSet.count(string) != 0 &&
-        (maps.count(dimension) == 0 ||
-         maps[dimension].first.left.count(string) == 0))
+    // Everything is mapped to NaN.  However we must still keep track of
+    // everything that we have mapped, so we add it to the maps if needed.
+    if (maps.count(dimension) == 0 ||
+        maps[dimension].first.left.count(string) == 0)
     {
       // This string does not exist yet.
       typedef boost::bimap<std::string, MappedType>::value_type PairType;
       maps[dimension].first.insert(PairType(string, NaN));
-
-      size_t& numMappings = maps[dimension].second;
-      ++numMappings;
-      return NaN;
-    }
-    else
-    {
-      // This string already exists in the mapping or not included in
-      // the missingSet.
-      return NaN;
+      maps[dimension].second++;
     }
+
+    return std::numeric_limits<MappedType>::quiet_NaN();
   }
 
   /**
@@ -141,7 +132,8 @@ class MissingPolicy
 
  private:
   // Note that missingSet and maps are different.
-  // missingSet specifies which value/string should be mapped.
+  // missingSet specifies which value/string should be mapped and may be a
+  // superset of 'maps'.
   std::set<std::string> missingSet;
 }; // class MissingPolicy
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list