[mlpack] 158/207: simplify parser by phrase_parse
Barak A. Pearlmutter
barak+git at pearlmutter.net
Thu Mar 23 17:53:50 UTC 2017
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch master
in repository mlpack.
commit 82093f0db86909092b62fb99b94bdc98d66f2a4e
Author: stereomatchingkiss <stereomatchingkiss at gmail.com>
Date: Tue Jun 7 13:44:07 2016 +0800
simplify parser by phrase_parse
---
src/mlpack/core/data/load_csv.hpp | 60 ++++++++++++++++-----------------------
1 file changed, 25 insertions(+), 35 deletions(-)
diff --git a/src/mlpack/core/data/load_csv.hpp b/src/mlpack/core/data/load_csv.hpp
index 968d3ce..0a23a97 100644
--- a/src/mlpack/core/data/load_csv.hpp
+++ b/src/mlpack/core/data/load_csv.hpp
@@ -27,7 +27,7 @@ namespace data /** Functions to load and save matrices and models. */ {
*http://theboostcpplibraries.com/boost.spirit for quick review.
*/
class LoadCSV
-{
+{
public:
explicit LoadCSV(std::string file, bool fatal = false) :
extension(Extension(file)),
@@ -86,8 +86,8 @@ public:
//parse % "," means you want to parse string like "1,2,3,apple"(noticed it without last comma)
//qi::raw restrict the automatic conversion of boost::spirit, without it, spirit parser
- //will try to convert the string to std::string, this may cause memory allocation(if small string
- //optimization fail).
+ //will try to convert the string to std::string, this may cause memory allocation(if small string
+ //optimization fail).
//After we wrap the parser with qi::raw, the attribute(the data accepted by functor) will
//become boost::iterator_range, this could save a tons of memory allocations
qi::parse(begin, end, qi::raw[*~qi::char_(",\r\n")][findColSize] % ",");
@@ -182,20 +182,21 @@ private:
row));
};
- qi::rule<std::string::iterator, T()> numRule = CreateNumRule<T>();
- qi::rule<std::string::iterator, iter_type()> charRule = CreateCharRule();
+ auto numRule = CreateNumRule<T>();
+ auto charRule = CreateCharRule();
while(std::getline(inFile, line))
{
auto begin = line.begin();
//parse the numbers from a line(ex : 1,2,3,4), if the parser find the number
//it will execute the setNum function
- qi::parse(begin, line.end(), numRule[setNum] % ",");
+ qi::phrase_parse(begin, line.end(), numRule[setNum] % ",", ascii::space);
if(col != inout.n_cols)
{
begin = line.begin();
col = 0;
- const bool canParse = qi::parse(begin, line.end(),
- charRule[setCharClass] % ",");
+ const bool canParse = qi::phrase_parse(begin, line.end(),
+ charRule[setCharClass] % ",",
+ ascii::space);
if(!canParse)
{
throw std::runtime_error("LoadCSV cannot parse categories");
@@ -231,9 +232,6 @@ private:
{
using namespace boost::spirit;
- //static size_t loop = 0;
- //std::cout<<"loop "<<loop++<<std::endl;
-
size_t row = 0;
size_t col = 0;
size_t progress = 0;
@@ -242,7 +240,6 @@ private:
inFile.seekg(0, std::ios::beg);
auto setNum = [&](T val)
{
- //std::cout<<"val(" <<val<<"),";
if(mapCols.find(progress) != std::end(mapCols))
{
inout(row, col) =
@@ -259,7 +256,6 @@ private:
{
if(mapCols.find(progress) != std::end(mapCols))
{
- //std::cout<<"nstr("<<std::string(iter.begin(), iter.end())<<"),";
std::string str(iter.begin(), iter.end());
if(str == "\t")
{
@@ -271,26 +267,24 @@ private:
}
else
{
- //std::cout<<"str("<<std::string(iter.begin(), iter.end())<<"),";
mapCols.insert(progress);
- //TODO : find a way to stop parsing from here
}
++progress; ++row;
};
- qi::rule<std::string::iterator, T()> numRule = CreateNumRule<T>();
- qi::rule<std::string::iterator, iter_type()> charRule = CreateCharRule();
+ auto numRule = CreateNumRule<T>();
+ auto charRule = CreateCharRule();
while(std::getline(inFile, line))
{
auto begin = line.begin();
row = 0;
progress = 0;
const size_t oldSize = mapCols.size();
- //parse number of characters from a line, it will execute setNum if it is number,
- //else execute setCharClass, "|" means "if not a, then b"
- const bool canParse = qi::parse(begin, line.end(),
- (numRule[setNum] | (charRule)[setCharClass]) % ",");
- //std::cout<<std::endl;
+ //parse number of characters from a line, it will execute setNum if it is number,
+ //else execute setCharClass, "|" means "if not a, then b"
+ const bool canParse = qi::phrase_parse(begin, line.end(),
+ (numRule[setNum] | charRule[setCharClass]) % ",",
+ ascii::space);
if(!canParse)
{
throw std::runtime_error("LoadCSV cannot parse categories");
@@ -306,7 +300,8 @@ private:
}
template<typename T>
- boost::spirit::qi::rule<std::string::iterator, T()> CreateNumRule() const
+ boost::spirit::qi::rule<std::string::iterator, T(), boost::spirit::ascii::space_type>
+ CreateNumRule() const
{
using namespace boost::spirit;
@@ -319,40 +314,35 @@ private:
//qi::omit can omit the attributes of spirit, every parser of spirit
//has attribute(the type will pass into actions(functor))
//if you do not omit it, the attribute combine with attribute may
- //change the attribute
+ //change the attribute
//input like 2-200 or 2DM will make the parser fail,
//so we use "look ahead parser--&" to make sure next
//character is "," or end of line(eof) or end of file(eoi)
//looks ahead parser will not consume any input or generate
//any attribute
-
- //"-" means one or zero(same as "-" of EBNF)
if(extension == "csv" || extension == "txt")
{
- return qi::skip(qi::char_(" "))[elemParser] >> -qi::omit[*qi::char_(" ")]
- >> &(qi::lit(",") | qi::eol | qi::eoi);
+ return elemParser >> &(qi::lit(",") | qi::eol | qi::eoi);
}
else
{
- return qi::skip(qi::char_(" "))[elemParser] >> -qi::omit[*qi::char_(" ")]
- >> &(qi::lit("\t") | qi::eol | qi::eoi);
+ return elemParser >> &(qi::lit("\t") | qi::eol | qi::eoi);
}
}
- boost::spirit::qi::rule<std::string::iterator, iter_type()> CreateCharRule() const
+ boost::spirit::qi::rule<std::string::iterator, iter_type(), boost::spirit::ascii::space_type>
+ CreateCharRule() const
{
using namespace boost::spirit;
if(extension == "csv" || extension == "txt")
{
- return -qi::omit[*qi::char_(" ")] >> qi::raw[*~qi::char_(" ,\r\n")]
- >> -qi::omit[*qi::char_(" ")];
+ return qi::raw[*~qi::char_(" ,\r\n")];
}
else
{
- return -qi::omit[*qi::char_(" ")] >> qi::raw[*~qi::char_(" \t\r\n")]
- >> -qi::omit[*qi::char_(" ")];
+ return qi::raw[*~qi::char_(" \t\r\n")];
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list