[mlpack] 144/207: 1 : fix bug, did not consider case like "210DM, 1~200" 2 : fix bug, cannot parse transpose file with correct result
Barak A. Pearlmutter
barak+git at pearlmutter.net
Thu Mar 23 17:53:48 UTC 2017
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch master
in repository mlpack.
commit 47b726e1885f0d867c00dd5c96e91bb52325226e
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date: Sun Jun 5 00:22:27 2016 +0800
1 : fix bug, did not consider case like "210DM, 1~200"
2 : fix bug, cannot parse transpose file with correct result
---
src/mlpack/core/data/load_csv.hpp | 71 +++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 21 deletions(-)
diff --git a/src/mlpack/core/data/load_csv.hpp b/src/mlpack/core/data/load_csv.hpp
index aab567f..89dd19c 100644
--- a/src/mlpack/core/data/load_csv.hpp
+++ b/src/mlpack/core/data/load_csv.hpp
@@ -11,6 +11,8 @@
#include <mlpack/core/util/log.hpp>
#include <mlpack/core/arma_extend/arma_extend.hpp> // Includes Armadillo.
+
+#include <unordered_set>
#include <string>
#include "format.hpp"
@@ -164,7 +166,10 @@ private:
auto begin = line.begin();
const bool allNumber =
qi::parse(begin, line.end(), numRule[setNum] % ",");
- if(!allNumber)
+ //input like 2-200 or 2DM will make the parser fail,
+ //so we have to make sure col == inout.n_cols, else parse
+ //the input line again
+ if(!allNumber || col != inout.n_cols)
{
begin = line.begin();
col = 0;
@@ -185,19 +190,29 @@ private:
{
infoSet = DatasetInfo(ColSize());
inout.set_size(infoSet.Dimensionality(), RowSize());
- while(!TranposeParseImpl(inout, infoSet))
+ size_t parseTime = 0;
+ std::unordered_set<size_t> mapRows;
+ while(!TranposeParseImpl(inout, infoSet, mapRows))
{
-
+ //avoid infinite loop
+ ++parseTime;
+ infoSet = DatasetInfo(inout.n_rows);
+ if(parseTime == inout.n_rows)
+ {
+ return;
+ }
}
}
template<typename T>
- bool TranposeParseImpl(arma::Mat<T> &inout, DatasetInfo &infoSet)
+ bool TranposeParseImpl(arma::Mat<T> &inout, DatasetInfo &infoSet,
+ std::unordered_set<size_t> &mapRows)
{
using namespace boost::spirit;
size_t row = 0;
size_t col = 0;
+ size_t progress = 0;
std::string line;
inFile.clear();
inFile.seekg(0, std::ios::beg);
@@ -205,12 +220,15 @@ private:
auto setNum = [&](T val)
{
inout(row++, col) = val;
+ ++progress;
+ //std::cout<<val<<",";
};
auto setCharClass = [&](iter_type const &iter)
{
+ //std::cout<<std::string(iter.begin(), iter.end())<<",";
inout(row++, col) =
static_cast<T>(infoSet.MapString(std::string(iter.begin(), iter.end()),
- col));
+ progress++));
};
qi::rule<std::string::iterator, T()> numRule = CreateNumRule<T>();
@@ -218,28 +236,39 @@ private:
while(std::getline(inFile, line))
{
auto begin = line.begin();
- const bool allNumber =
- qi::parse(begin, line.end(), numRule[setNum] % ",");
- if(!allNumber)
+ const bool shouldMapNum = mapRows.find(row) != std::end(mapRows);
+ bool allNumber = false;
+ if(!shouldMapNum)
{
- begin = line.begin();
- const size_t dimension = infoSet.NumMappings(col);
- if((dimension == 0 && row == 0) || dimension != 0)
+ allNumber = qi::parse(begin, line.end(), numRule[setNum] % ",");
+ }
+ //std::cout<<"progress "<<parseProgress<<", "<<inout.n_rows<<std::endl;
+ //std::cout<<std::endl;
+ //input like 2-200 or 2DM will make the parser fail,
+ //so we have to make sure col == inout.n_cols, else parse
+ //the input line again
+ if(shouldMapNum || !allNumber || progress != inout.n_rows)
+ {
+ //std::cout<<"not all number"<<std::endl;
+ mapRows.insert(row);
+
+ if(!shouldMapNum)
{
- row = 0;
- const bool canParse = qi::parse(begin, line.end(),
- charRule[setCharClass] % ",");
- if(!canParse)
- {
- throw std::runtime_error("LoadCSV cannot parse categories");
- }
+ return false;
}
- else
+
+ begin = line.begin();
+ row = 0;
+ progress = 0;
+ const bool canParse = qi::parse(begin, line.end(),
+ charRule[setCharClass] % ",");
+ //std::cout<<std::endl;
+ if(!canParse)
{
- return false;
+ throw std::runtime_error("LoadCSV cannot parse categories");
}
}
- row = 0; ++col;
+ row = 0; progress = 0; ++col;
}
return true;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list