[mlpack] 169/207: move implementation details from cpp back to hpp

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu Mar 23 17:53:51 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit 820519f293ec6dbebf98af4b27988342bc48babd
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date:   Wed Jun 29 05:50:20 2016 +0800

    move implementation details from cpp back to hpp
---
 src/mlpack/core/data/CMakeLists.txt |  1 -
 src/mlpack/core/data/load_csv.hpp   | 95 +++++++++++++++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/src/mlpack/core/data/CMakeLists.txt b/src/mlpack/core/data/CMakeLists.txt
index 45d3972..be64aac 100644
--- a/src/mlpack/core/data/CMakeLists.txt
+++ b/src/mlpack/core/data/CMakeLists.txt
@@ -6,7 +6,6 @@ set(SOURCES
   extension.hpp
   format.hpp
   load_csv.hpp
-  load_csv.cpp
   load.hpp  
   load_impl.hpp
   load_arff.hpp
diff --git a/src/mlpack/core/data/load_csv.hpp b/src/mlpack/core/data/load_csv.hpp
index f0a0b38..58f3043 100644
--- a/src/mlpack/core/data/load_csv.hpp
+++ b/src/mlpack/core/data/load_csv.hpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <string>
 
+#include "extension.hpp"
 #include "format.hpp"
 #include "dataset_info.hpp"
 
@@ -29,7 +30,14 @@ namespace data /** Functions to load and save matrices and models. */ {
 class LoadCSV
 {
 public:
-  explicit LoadCSV(std::string file, bool fatal = false);
+  explicit LoadCSV(std::string file, bool fatal = false)  :
+    extension(Extension(file)),
+    fatalIfOpenFail(fatal),
+    fileName(std::move(file)),
+    inFile(fileName)
+  {
+    CanOpen();
+  }
 
   template<typename T>
   void Load(arma::Mat<T> &inout, DatasetInfo &infoSet, bool transpose = true)
@@ -51,9 +59,56 @@ public:
     }
   }
 
-  size_t ColSize();
+  size_t ColSize()
+  {
+    //boost tokenizer or strtok can do the same thing, I use
+    //spirit at here because I think this is a nice example
+    using namespace boost::spirit;
+    using bsi_type = boost::spirit::istream_iterator;
+    using iter_type = boost::iterator_range<bsi_type>;
+
+    inFile.clear();
+    inFile.seekg(0, std::ios::beg);
+    //spirit::qi requires iterators to be atleast forward iterators,
+    //but std::istream_iterator is input iteraotr, so we use
+    //boost::spirit::istream_iterator to overcome this problem
+    bsi_type begin(inFile);
+    bsi_type end;
+    size_t col = 0;
+
+    //the parser of boost spirit can work with "actions"(functor)
+    //when the parser find match target, this functor will be executed
+    auto findColSize = [&col](iter_type){ ++col; };
+
+    //qi::char_ bite an character
+    //qi::char_(",\r\n") only bite a "," or "\r" or "\n" character
+    //* means the parser(ex : qi::char_) can bite [0, any size] of characters
+    //~ means negate, so ~qi::char_(",\r\n") means I want to bite anything except of ",\r\n"
+    //parse % "," means you want to parse string like "1,2,3,apple"(noticed it without last comma)
+
+    //qi::raw restrict the automatic conversion of boost::spirit, without it, spirit parser
+    //will try to convert the string to std::string, this may cause memory allocation(if small string
+    //optimization fail).
+    //After we wrap the parser with qi::raw, the attribute(the data accepted by functor) will
+    //become boost::iterator_range, this could save a tons of memory allocations
+    qi::parse(begin, end, qi::raw[*~qi::char_(",\r\n")][findColSize] % ",");
+
+    return col;
+  }
+
+  size_t RowSize()
+  {
+    inFile.clear();
+    inFile.seekg(0, std::ios::beg);
+    size_t row = 0;
+    std::string line;
+    while(std::getline(inFile, line))
+    {
+      ++row;
+    }
 
-  size_t RowSize();
+    return row;
+  }
 
 private:
   using iter_type = boost::iterator_range<std::string::iterator>;
@@ -79,7 +134,25 @@ private:
     }
   };
 
-  bool CanOpen();
+  bool CanOpen()
+  {
+    if(!inFile.is_open())
+    {
+      if(fatalIfOpenFail)
+      {
+        Log::Fatal << "Cannot open file '" << fileName << "'. " << std::endl;
+      }
+      else
+      {
+        Log::Warn << "Cannot open file '" << fileName << "'; load failed."
+                  << std::endl;
+      }
+      return false;
+    }
+    inFile.unsetf(std::ios::skipws);
+
+    return true;
+  }
 
   template<typename T>
   void NonTranposeParse(arma::Mat<T> &inout, DatasetInfo &infoSet)
@@ -260,7 +333,19 @@ private:
   }
 
   boost::spirit::qi::rule<std::string::iterator, iter_type(), boost::spirit::ascii::space_type>
-  CreateCharRule() const;
+  CreateCharRule() const
+  {
+    using namespace boost::spirit;
+
+    if(extension == "csv" || extension == "txt")
+    {
+      return qi::raw[*~qi::char_(",\r\n")];
+    }
+    else
+    {
+      return qi::raw[*~qi::char_("\t\r\n")];
+    }
+  }
 
   std::string extension;
   bool fatalIfOpenFail;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list