[mlpack] 145/207: fix bug--category conversion should based on columns but not rows

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu Mar 23 17:53:48 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit 3f88f912e52c1b6cf47bfb6df4d4372fa720c15f
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date:   Sun Jun 5 02:18:12 2016 +0800

    fix bug--category conversion should based on columns but not rows
---
 src/mlpack/core/data/load_csv.hpp | 85 ++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/src/mlpack/core/data/load_csv.hpp b/src/mlpack/core/data/load_csv.hpp
index 89dd19c..ce5c8c4 100644
--- a/src/mlpack/core/data/load_csv.hpp
+++ b/src/mlpack/core/data/load_csv.hpp
@@ -7,7 +7,7 @@
 #ifndef MLPACK_CORE_DATA_LOAD_CSV_HPP
 #define MLPACK_CORE_DATA_LOAD_CSV_HPP
 
-#include<boost/spirit/include/qi.hpp>
+#include <boost/spirit/include/qi.hpp>
 
 #include <mlpack/core/util/log.hpp>
 #include <mlpack/core/arma_extend/arma_extend.hpp> // Includes Armadillo.
@@ -191,8 +191,8 @@ private:
     infoSet = DatasetInfo(ColSize());
     inout.set_size(infoSet.Dimensionality(), RowSize());
     size_t parseTime = 0;
-    std::unordered_set<size_t> mapRows;
-    while(!TranposeParseImpl(inout, infoSet, mapRows))
+    std::set<size_t> mapCols;
+    while(!TranposeParseImpl(inout, infoSet, mapCols))
     {
       //avoid infinite loop
       ++parseTime;
@@ -206,29 +206,50 @@ private:
 
   template<typename T>
   bool TranposeParseImpl(arma::Mat<T> &inout, DatasetInfo &infoSet,
-                         std::unordered_set<size_t> &mapRows)
+                         std::set<size_t> &mapCols)
   {
     using namespace boost::spirit;
 
+    //static size_t loop = 0;
+    //std::cout<<"loop "<<loop++<<std::endl;
+
     size_t row = 0;
     size_t col = 0;
     size_t progress = 0;
     std::string line;
     inFile.clear();
     inFile.seekg(0, std::ios::beg);
-
     auto setNum = [&](T val)
     {
-      inout(row++, col) = val;
-      ++progress;
-      //std::cout<<val<<",";
+      //std::cout<<"val(" <<val<<"),";
+      if(mapCols.find(progress) != std::end(mapCols))
+      {
+        inout(row, col) =
+            static_cast<T>(infoSet.MapString(std::to_string(val),
+                                             progress));
+      }
+      else
+      {
+        inout(row, col) = val;
+      }
+      ++progress; ++row;
     };
     auto setCharClass = [&](iter_type const &iter)
     {
-      //std::cout<<std::string(iter.begin(), iter.end())<<",";
-      inout(row++, col) =
-          static_cast<T>(infoSet.MapString(std::string(iter.begin(), iter.end()),
-                                           progress++));
+      if(mapCols.find(progress) != std::end(mapCols))
+      {
+        //std::cout<<"nstr("<<std::string(iter.begin(), iter.end())<<"),";
+        inout(row, col) =
+            static_cast<T>(infoSet.MapString(std::string(iter.begin(), iter.end()),
+                                             progress));
+      }
+      else
+      {
+        //std::cout<<"str("<<std::string(iter.begin(), iter.end())<<"),";
+        mapCols.insert(progress);
+        //TODO : find a way to stop parsing from here
+      }
+      ++progress; ++row;
     };
 
     qi::rule<std::string::iterator, T()> numRule = CreateNumRule<T>();
@@ -236,39 +257,21 @@ private:
     while(std::getline(inFile, line))
     {
       auto begin = line.begin();
-      const bool shouldMapNum = mapRows.find(row) != std::end(mapRows);
-      bool allNumber = false;
-      if(!shouldMapNum)
+      row = 0;
+      progress = 0;
+      const size_t oldSize = mapCols.size();
+      const bool canParse = qi::parse(begin, line.end(),
+                                      (numRule[setNum] | charRule[setCharClass]) % ",");
+      //std::cout<<std::endl;
+      if(!canParse)
       {
-        allNumber = qi::parse(begin, line.end(), numRule[setNum] % ",");
+        throw std::runtime_error("LoadCSV cannot parse categories");
       }
-      //std::cout<<"progress "<<parseProgress<<", "<<inout.n_rows<<std::endl;
-      //std::cout<<std::endl;
-      //input like 2-200 or 2DM will make the parser fail,
-      //so we have to make sure col == inout.n_cols, else parse
-      //the input line again
-      if(shouldMapNum || !allNumber || progress != inout.n_rows)
+      if(mapCols.size() > oldSize)
       {
-        //std::cout<<"not all number"<<std::endl;
-        mapRows.insert(row);
-
-        if(!shouldMapNum)
-        {
-          return false;
-        }
-
-        begin = line.begin();
-        row = 0;
-        progress = 0;
-        const bool canParse = qi::parse(begin, line.end(),
-                                        charRule[setCharClass] % ",");
-        //std::cout<<std::endl;
-        if(!canParse)
-        {
-          throw std::runtime_error("LoadCSV cannot parse categories");
-        }
+        return false;
       }
-      row = 0; progress = 0; ++col;
+      ++col;
     }
 
     return true;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list