[mlpack] 13/58: * added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Tue Sep 9 13:19:39 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit e0f62d3728779b0261bdd12ca906bc637415b5b4
Author: sumedhghaisas <sumedhghaisas at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Sun Aug 17 21:58:26 2014 +0000
* added NMF, SVDBatch, SVDIncompleteIncremental and SVDCompleteIncremental to CF executable
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17059 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/cf/cf.hpp | 16 +-------
src/mlpack/methods/cf/cf_impl.hpp | 46 ++-------------------
src/mlpack/methods/cf/cf_main.cpp | 75 ++++++++++++++++++++++-------------
src/mlpack/methods/cf/svd_wrapper.hpp | 3 ++
src/mlpack/tests/to_string_test.cpp | 3 +-
5 files changed, 56 insertions(+), 87 deletions(-)
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 0312ccb..6f6b7b1 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -79,20 +79,6 @@ class CF
{
public:
/**
- * Initialize the CF object. Store a reference to the data that we
- * will be using. There are parameters that can be set; default values
- * are provided for each of them. If the rank is left unset (or is set to 0),
- * a simple density-based heuristic will be used to choose a rank.
- *
- * @param data Initial (user, item, rating) matrix.
- * @param numUsersForSimilarity Size of the neighborhood.
- * @param rank Rank parameter for matrix factorization.
- */
- CF(arma::mat& data,
- const size_t numUsersForSimilarity = 5,
- const size_t rank = 0);
-
- /**
* Initialize the CF object using an instantiated factorizer. Store a
* reference to the data that we will be using. There are parameters that can
* be set; default values are provided for each of them. If the rank is left
@@ -105,7 +91,7 @@ class CF
* @param rank Rank parameter for matrix factorization.
*/
CF(arma::mat& data,
- FactorizerType& factorizer,
+ FactorizerType factorizer = FactorizerType(),
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index c455018..53b0a05 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -29,6 +29,7 @@ void ApplyFactorizer(arma::mat& data,
FactorizerTraits<FactorizerType>::UsesCoordinateList == false,
int*>::type = 0)
{
+ (void)data;
factorizer.Apply(cleanedData, rank, w, h);
}
@@ -48,57 +49,16 @@ void ApplyFactorizer(arma::mat& data,
FactorizerTraits<FactorizerType>::UsesCoordinateList == true,
int*>::type = 0)
{
+ (void)cleanedData;
factorizer.Apply(data, rank, w, h);
}
/**
- * Construct the CF object.
- */
-template<typename FactorizerType>
-CF<FactorizerType>::CF(arma::mat& data,
- const size_t numUsersForSimilarity,
- const size_t rank) :
- numUsersForSimilarity(numUsersForSimilarity),
- rank(rank),
- factorizer()
-{
- // Validate neighbourhood size.
- if(numUsersForSimilarity < 1)
- {
- Log::Warn << "CF::CF(): neighbourhood size should be > 0("
- << numUsersForSimilarity << " given). Setting value to 5.\n";
- //Setting Default Value of 5
- this->numUsersForSimilarity = 5;
- }
-
- CleanData(data);
-
- // Check if the user wanted us to choose a rank for them.
- if(rank == 0)
- {
- // This is a simple heuristic that picks a rank based on the density of the
- // dataset between 5 and 105.
- const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
- const size_t rankEstimate = size_t(density) + 5;
-
- // Set to heuristic value.
- Log::Info << "No rank given for decomposition; using rank of "
- << rankEstimate << " calculated by density-based heuristic."
- << std::endl;
- this->rank = rankEstimate;
- }
-
- // Operations independent of the query:
- // Decompose the sparse data matrix to user and data matrices.
- ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
-}
-
-/**
* Construct the CF object using an instantiated factorizer.
*/
template<typename FactorizerType>
CF<FactorizerType>::CF(arma::mat& data,
- FactorizerType& factorizer,
+ FactorizerType factorizer,
const size_t numUsersForSimilarity,
const size_t rank) :
numUsersForSimilarity(numUsersForSimilarity),
diff --git a/src/mlpack/methods/cf/cf_main.cpp b/src/mlpack/methods/cf/cf_main.cpp
index 7c873d4..7998c79 100644
--- a/src/mlpack/methods/cf/cf_main.cpp
+++ b/src/mlpack/methods/cf/cf_main.cpp
@@ -6,10 +6,13 @@
*/
#include <mlpack/core.hpp>
+
+#include <mlpack/methods/amf/amf.hpp>
#include "cf.hpp"
using namespace mlpack;
using namespace mlpack::cf;
+using namespace mlpack::amf;
using namespace std;
// Document program.
@@ -40,11 +43,8 @@ PARAM_STRING("query_file", "List of users for which recommendations are to "
PARAM_STRING("output_file","File to save output recommendations to.", "o",
"recommendations.csv");
-// These features are not yet available in the CF code.
-//PARAM_STRING("algorithm", "Algorithm used for CF ('als' or 'svd').", "a",
-// "als");
-//PARAM_STRING("nearest_neighbor_algorithm", "Similarity search procedure to "
-// "be used for generating recommendations.", "s", "knn");
+PARAM_STRING("algorithm", "Algorithm used for matrix factorization.", "a",
+ "NMF");
PARAM_INT("recommendations", "Number of recommendations to generate for each "
"query user.", "r", 5);
@@ -53,29 +53,15 @@ PARAM_INT("neighborhood", "Size of the neighborhood of similar users to "
PARAM_INT("rank", "Rank of decomposed matrices.", "R", 2);
-int main(int argc, char** argv)
+template<typename Factorizer>
+void ComputeRecommendations(Factorizer factorizer,
+ arma::mat& dataset,
+ const size_t numRecs,
+ const size_t neighbourhood,
+ const size_t rank,
+ arma::Mat<size_t>& recommendations)
{
- // Parse command line options.
- CLI::ParseCommandLine(argc, argv);
-
- // Read from the input file.
- const string inputFile = CLI::GetParam<string>("input_file");
- arma::mat dataset;
- data::Load(inputFile, dataset, true);
-
- // Recommendation matrix.
- arma::Mat<size_t> recommendations;
-
- // Get parameters.
- const size_t numRecs = (size_t) CLI::GetParam<int>("recommendations");
- const size_t neighborhood = (size_t) CLI::GetParam<int>("neighborhood");
- const size_t rank = (size_t) CLI::GetParam<int>("rank");
-
- // Perform decomposition to prepare for recommendations.
- Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
- CF<> c(dataset);
- c.NumUsersForSimilarity(neighborhood);
- c.Rank(rank);
+ CF<Factorizer> c(dataset, factorizer, neighbourhood, rank);
// Reading users.
const string queryFile = CLI::GetParam<string>("query_file");
@@ -96,6 +82,41 @@ int main(int argc, char** argv)
Log::Info << "Generating recommendations for all users." << endl;
c.GetRecommendations(numRecs, recommendations);
}
+}
+
+#define CR(x) ComputeRecommendations(x, dataset, numRecs, neighborhood, rank, recommendations)
+
+int main(int argc, char** argv)
+{
+ // Parse command line options.
+ CLI::ParseCommandLine(argc, argv);
+
+ // Read from the input file.
+ const string inputFile = CLI::GetParam<string>("input_file");
+ arma::mat dataset;
+ data::Load(inputFile, dataset, true);
+
+ // Recommendation matrix.
+ arma::Mat<size_t> recommendations;
+
+ // Get parameters.
+ const size_t numRecs = (size_t) CLI::GetParam<int>("recommendations");
+ const size_t neighborhood = (size_t) CLI::GetParam<int>("neighborhood");
+ const size_t rank = (size_t) CLI::GetParam<int>("rank");
+
+ // Perform decomposition to prepare for recommendations.
+ Log::Info << "Performing CF matrix decomposition on dataset..." << endl;
+
+ const string algo = CLI::GetParam<string>("algorithm");
+
+ if(algo == "NMF")
+ CR(NMFALSFactorizer());
+ else if(algo == "SVDBatch")
+ CR(SparseSVDBatchFactorizer());
+ else if(algo == "SVDIncompleteIncremental")
+ CR(SparseSVDIncompleteIncrementalFactorizer());
+ else if(algo == "SVDCompleteIncremental")
+ CR(SparseSVDCompleteIncrementalFactorizer());
const string outputFile = CLI::GetParam<string>("output_file");
data::Save(outputFile, recommendations);
diff --git a/src/mlpack/methods/cf/svd_wrapper.hpp b/src/mlpack/methods/cf/svd_wrapper.hpp
index 3835e89..89b6d5a 100644
--- a/src/mlpack/methods/cf/svd_wrapper.hpp
+++ b/src/mlpack/methods/cf/svd_wrapper.hpp
@@ -74,6 +74,9 @@ class SVDWrapper
Factorizer factorizer;
}; // class SVDWrapper
+//! add simple typedefs
+typedef SVDWrapper<DummyClass> ArmaSVDFactorizer;
+
//! include the implementation
#include "svd_wrapper_impl.hpp"
diff --git a/src/mlpack/tests/to_string_test.cpp b/src/mlpack/tests/to_string_test.cpp
index 0b86d18..d06ccd4 100644
--- a/src/mlpack/tests/to_string_test.cpp
+++ b/src/mlpack/tests/to_string_test.cpp
@@ -281,7 +281,6 @@ BOOST_AUTO_TEST_CASE(MRKDString)
BOOST_AUTO_TEST_CASE(CFString)
{
- size_t a = 1 ;
arma::mat c(3, 3);
c(0, 0) = 1;
c(1, 0) = 2;
@@ -292,7 +291,7 @@ BOOST_AUTO_TEST_CASE(CFString)
c(0, 2) = 1;
c(1, 2) = 3;
c(2, 2) = 0.7;
- mlpack::cf::CF<> d(c, a, a);
+ mlpack::cf::CF<> d(c);
Log::Debug << d;
std::string s = d.ToString();
BOOST_REQUIRE_NE(s, "");
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list