[mlpack] 145/207: fix bug--category conversion should based on columns but not rows
Barak A. Pearlmutter
barak+git at pearlmutter.net
Thu Mar 23 17:53:48 UTC 2017
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch master
in repository mlpack.
commit 3f88f912e52c1b6cf47bfb6df4d4372fa720c15f
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date: Sun Jun 5 02:18:12 2016 +0800
fix bug--category conversion should based on columns but not rows
---
src/mlpack/core/data/load_csv.hpp | 85 ++++++++++++++++++++-------------------
1 file changed, 44 insertions(+), 41 deletions(-)
diff --git a/src/mlpack/core/data/load_csv.hpp b/src/mlpack/core/data/load_csv.hpp
index 89dd19c..ce5c8c4 100644
--- a/src/mlpack/core/data/load_csv.hpp
+++ b/src/mlpack/core/data/load_csv.hpp
@@ -7,7 +7,7 @@
#ifndef MLPACK_CORE_DATA_LOAD_CSV_HPP
#define MLPACK_CORE_DATA_LOAD_CSV_HPP
-#include<boost/spirit/include/qi.hpp>
+#include <boost/spirit/include/qi.hpp>
#include <mlpack/core/util/log.hpp>
#include <mlpack/core/arma_extend/arma_extend.hpp> // Includes Armadillo.
@@ -191,8 +191,8 @@ private:
infoSet = DatasetInfo(ColSize());
inout.set_size(infoSet.Dimensionality(), RowSize());
size_t parseTime = 0;
- std::unordered_set<size_t> mapRows;
- while(!TranposeParseImpl(inout, infoSet, mapRows))
+ std::set<size_t> mapCols;
+ while(!TranposeParseImpl(inout, infoSet, mapCols))
{
//avoid infinite loop
++parseTime;
@@ -206,29 +206,50 @@ private:
template<typename T>
bool TranposeParseImpl(arma::Mat<T> &inout, DatasetInfo &infoSet,
- std::unordered_set<size_t> &mapRows)
+ std::set<size_t> &mapCols)
{
using namespace boost::spirit;
+ //static size_t loop = 0;
+ //std::cout<<"loop "<<loop++<<std::endl;
+
size_t row = 0;
size_t col = 0;
size_t progress = 0;
std::string line;
inFile.clear();
inFile.seekg(0, std::ios::beg);
-
auto setNum = [&](T val)
{
- inout(row++, col) = val;
- ++progress;
- //std::cout<<val<<",";
+ //std::cout<<"val(" <<val<<"),";
+ if(mapCols.find(progress) != std::end(mapCols))
+ {
+ inout(row, col) =
+ static_cast<T>(infoSet.MapString(std::to_string(val),
+ progress));
+ }
+ else
+ {
+ inout(row, col) = val;
+ }
+ ++progress; ++row;
};
auto setCharClass = [&](iter_type const &iter)
{
- //std::cout<<std::string(iter.begin(), iter.end())<<",";
- inout(row++, col) =
- static_cast<T>(infoSet.MapString(std::string(iter.begin(), iter.end()),
- progress++));
+ if(mapCols.find(progress) != std::end(mapCols))
+ {
+ //std::cout<<"nstr("<<std::string(iter.begin(), iter.end())<<"),";
+ inout(row, col) =
+ static_cast<T>(infoSet.MapString(std::string(iter.begin(), iter.end()),
+ progress));
+ }
+ else
+ {
+ //std::cout<<"str("<<std::string(iter.begin(), iter.end())<<"),";
+ mapCols.insert(progress);
+ //TODO : find a way to stop parsing from here
+ }
+ ++progress; ++row;
};
qi::rule<std::string::iterator, T()> numRule = CreateNumRule<T>();
@@ -236,39 +257,21 @@ private:
while(std::getline(inFile, line))
{
auto begin = line.begin();
- const bool shouldMapNum = mapRows.find(row) != std::end(mapRows);
- bool allNumber = false;
- if(!shouldMapNum)
+ row = 0;
+ progress = 0;
+ const size_t oldSize = mapCols.size();
+ const bool canParse = qi::parse(begin, line.end(),
+ (numRule[setNum] | charRule[setCharClass]) % ",");
+ //std::cout<<std::endl;
+ if(!canParse)
{
- allNumber = qi::parse(begin, line.end(), numRule[setNum] % ",");
+ throw std::runtime_error("LoadCSV cannot parse categories");
}
- //std::cout<<"progress "<<parseProgress<<", "<<inout.n_rows<<std::endl;
- //std::cout<<std::endl;
- //input like 2-200 or 2DM will make the parser fail,
- //so we have to make sure col == inout.n_cols, else parse
- //the input line again
- if(shouldMapNum || !allNumber || progress != inout.n_rows)
+ if(mapCols.size() > oldSize)
{
- //std::cout<<"not all number"<<std::endl;
- mapRows.insert(row);
-
- if(!shouldMapNum)
- {
- return false;
- }
-
- begin = line.begin();
- row = 0;
- progress = 0;
- const bool canParse = qi::parse(begin, line.end(),
- charRule[setCharClass] % ",");
- //std::cout<<std::endl;
- if(!canParse)
- {
- throw std::runtime_error("LoadCSV cannot parse categories");
- }
+ return false;
}
- row = 0; progress = 0; ++col;
+ ++col;
}
return true;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list