[opengm] 137/386: added loss serialization; test-dataset-io still fails for sum of experts

Wed Aug 31 08:36:21 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/master
in repository opengm.

commit 119d873652f95b0aa57c6a5c535fe0a93caf3cd3
Author: mschiegg <martin.schiegg at iwr.uni-heidelberg.de>
Date:   Thu Dec 18 17:28:11 2014 +0100

    added loss serialization; test-dataset-io still fails for sum of experts
---
 include/opengm/learning/dataset/dataset_io.hxx     | 40 +++++++++---
 .../learning/loss/generalized-hammingloss.hxx      | 66 +++++++++++++++++++-
 include/opengm/learning/loss/hammingloss.hxx       | 18 ++++++
 include/opengm/learning/loss/noloss.hxx            | 18 ++++++
 src/unittest/learning/test_dataset_io.cxx          | 71 +++++++++++++++++-----
 5 files changed, 186 insertions(+), 27 deletions(-)

diff --git a/include/opengm/learning/dataset/dataset_io.hxx b/include/opengm/learning/dataset/dataset_io.hxx
index 0746689..8203e0a 100644
--- a/include/opengm/learning/dataset/dataset_io.hxx
+++ b/include/opengm/learning/dataset/dataset_io.hxx
@@ -6,6 +6,9 @@
 #include <cstdlib>
 #include <opengm/graphicalmodel/graphicalmodel_hdf5.hxx>
 #include <opengm/opengm.hxx>
+#include "opengm/learning/loss/generalized-hammingloss.hxx"
+#include "opengm/learning/loss/hammingloss.hxx"
+#include "opengm/learning/loss/noloss.hxx"
 //#include <H5Cpp.h>
 
 namespace opengm{
@@ -20,11 +23,12 @@ namespace opengm{
       };
 
       template<class DATASET>
-      void DatasetSerialization::save(const DATASET& dataset, const std::string datasetpath, const std::string prefix) { 
+      void DatasetSerialization::save(const DATASET& dataset, const std::string datasetpath, const std::string prefix) {
          typedef typename DATASET::GMType   GMType;
+         typedef typename DATASET::LossParameterType LossParameterType;
          typedef typename GMType::LabelType LabelType; 
          typedef typename GMType::ValueType ValueType;
-         
+
          std::vector<size_t> numWeights(1,dataset.getNumberOfWeights());
          std::vector<size_t> numModels(1,dataset.getNumberOfModels());
   
@@ -38,21 +42,25 @@ namespace opengm{
          for(size_t m=0; m<dataset.getNumberOfModels(); ++m){
             const GMType&                 gm = dataset.getModel(m); 
             const std::vector<LabelType>& gt = dataset.getGT(m);
-            std::stringstream ss, ss2;
+            const LossParameterType&      lossParam = dataset.getLossParameters(m);
+            std::stringstream ss;
             ss  << datasetpath <<"/"<<prefix<<"gm_" << m <<".h5"; 
-            opengm::hdf5::save(gm,ss.str(),"gm");
+            opengm::hdf5::save(gm, ss.str(), "gm");
             hid_t file = marray::hdf5::openFile(ss.str(), marray::hdf5::READ_WRITE);
             marray::hdf5::save(file,"gt",gt);
+            hid_t lossGrp = marray::hdf5::createGroup(file,"loss");
+            lossParam.save(lossGrp);
             marray::hdf5::closeFile(file);
          }
 
-      };
+      }
 
       template<class DATASET>
       void DatasetSerialization::loadAll(const std::string datasetpath, const std::string prefix, DATASET& dataset) {  
          typedef typename DATASET::GMType   GMType;
          typedef typename GMType::LabelType LabelType; 
          typedef typename GMType::ValueType ValueType;
+         typedef typename DATASET::LossParameterType LossParameterType;
          
          //Load Header 
          std::stringstream hss;
@@ -68,19 +76,33 @@ namespace opengm{
          dataset.gms_.resize(numModel); 
          dataset.gmsWithLoss_.resize(numModel);
          dataset.gts_.resize(numModel);
+         dataset.lossParams_.resize(numModel);
+         dataset.count_.resize(numModel);
+         dataset.isCached_.resize(numModel);
          dataset.weights_ = opengm::learning::Weights<ValueType>(numWeights);
+         OPENGM_ASSERT_OP(dataset.lossParams_.size(), ==, numModel);
          //Load Models and ground truth
          for(size_t m=0; m<numModel; ++m){
             std::stringstream ss;
             ss  << datasetpath <<"/"<<prefix<<"gm_" << m <<".h5"; 
             hid_t file =  marray::hdf5::openFile(ss.str()); 
             marray::hdf5::loadVec(file, "gt", dataset.gts_[m]);
-            marray::hdf5::closeFile(file);
-            opengm::hdf5::load(dataset.gms_[m],ss.str(),"gm"); 
+            opengm::hdf5::load(dataset.gms_[m],ss.str(),"gm");
+
+            LossParameterType lossParam;
+            hid_t lossGrp = marray::hdf5::openGroup(file, "loss");
+            lossParam.load(lossGrp);
+            std::vector<std::size_t> lossId;
+            marray::hdf5::loadVec(lossGrp, "lossId", lossId);
+            OPENGM_CHECK_OP(lossId.size(), ==, 1, "");
+            OPENGM_CHECK_OP(lossParam.getLossId(), ==, lossId[0],"the dataset needs to be initialized with the same loss type as saved");
+            dataset.lossParams_[m] = lossParam;
+
             OPENGM_CHECK_OP(dataset.gts_[m].size(), == ,dataset.gms_[m].numberOfVariables(), "");
-	    dataset.buildModelWithLoss(m);
+            marray::hdf5::closeFile(file);            
+            dataset.buildModelWithLoss(m);
          }
-      };
+      }
 
    }
 }
diff --git a/include/opengm/learning/loss/generalized-hammingloss.hxx b/include/opengm/learning/loss/generalized-hammingloss.hxx
index bb78154..16ef2ff 100644
--- a/include/opengm/learning/loss/generalized-hammingloss.hxx
+++ b/include/opengm/learning/loss/generalized-hammingloss.hxx
@@ -3,6 +3,9 @@
 #define OPENGM_GENERALIZED_HAMMING_LOSS_HXX
 
 #include "opengm/functions/explicit_function.hxx"
+#include "opengm/graphicalmodel/graphicalmodel_hdf5.hxx"
+#include "hdf5.h"
+
 namespace opengm {
 namespace learning {
 
@@ -16,6 +19,9 @@ class GeneralizedHammingLoss{
 public:
     class Parameter{
     public:
+        double getNodeLossMultiplier(const size_t i) const;
+        double getLabelLossMultiplier(const size_t i) const;
+
         std::vector<double> nodeLossMultiplier_;
         std::vector<double> labelLossMultiplier_;
 
@@ -28,6 +34,19 @@ public:
         bool operator>(const GeneralizedHammingLoss & other) const{
                 nodeLossMultiplier_ > labelLossMultiplier_;
         }
+
+        /**
+         * serializes the parameter object to the given hdf5 group handle;
+         * the group must contain a dataset "lossType" containing the
+         * loss type as a string
+         **/
+        void save(hid_t& groupHandle) const;
+        void load(const hid_t& groupHandle);
+        static std::size_t getLossId() { return lossId_; }
+
+    private:
+        static const std::size_t lossId_ = 16001;
+
     };
 
 
@@ -44,6 +63,47 @@ private:
     Parameter param_;
 };
 
+inline double GeneralizedHammingLoss::Parameter::getNodeLossMultiplier(const size_t i) const {
+    if(i >= this->nodeLossMultiplier_.size()) {
+        return 1.;
+    }
+    return this->nodeLossMultiplier_[i];
+}
+
+inline double GeneralizedHammingLoss::Parameter::getLabelLossMultiplier(const size_t i) const {
+    if(i >= this->labelLossMultiplier_.size()) {
+        return 1.;
+    }
+    return this->labelLossMultiplier_[i];
+}
+
+inline void GeneralizedHammingLoss::Parameter::save(hid_t& groupHandle) const {
+    std::vector<std::size_t> name;
+    name.push_back(this->getLossId());
+    marray::hdf5::save(groupHandle,"lossId",name);
+
+    if (this->nodeLossMultiplier_.size() > 0) {
+        marray::hdf5::save(groupHandle,"nodeLossMultiplier",this->nodeLossMultiplier_);
+    }
+    if (this->labelLossMultiplier_.size() > 0) {
+        marray::hdf5::save(groupHandle,"labelLossMultiplier",this->labelLossMultiplier_);
+    }
+}
+
+inline void GeneralizedHammingLoss::Parameter::load(const hid_t& groupHandle) {
+    if (H5Dopen(groupHandle, "nodeLossMultiplier", H5P_DEFAULT) >= 0) {
+        marray::hdf5::loadVec(groupHandle, "nodeLossMultiplier", this->nodeLossMultiplier_);
+    } else {
+        std::cout << "nodeLossMultiplier of GeneralizedHammingLoss not found, setting default values" << std::endl;
+    }
+
+    if (H5Dopen(groupHandle, "labelLossMultiplier", H5P_DEFAULT) >= 0) {
+        marray::hdf5::loadVec(groupHandle, "labelLossMultiplier", this->labelLossMultiplier_);
+    } else {
+        std::cout << "labelLossMultiplier of GeneralizedHammingLoss not found, setting default values" << std::endl;
+    }
+}
+
 template<class IT1, class IT2>
 double GeneralizedHammingLoss::loss(IT1 labelBegin, const IT1 labelEnd, IT2 GTBegin, const IT2 GTEnd) const
 {
@@ -51,8 +111,8 @@ double GeneralizedHammingLoss::loss(IT1 labelBegin, const IT1 labelEnd, IT2 GTBe
     size_t nodeIndex = 0;
 
     for(; labelBegin!= labelEnd; ++labelBegin, ++GTBegin, ++nodeIndex){
-        if(*labelBegin != *GTBegin){
-            loss += param_.nodeLossMultiplier_[nodeIndex] * param_.labelLossMultiplier_[*labelBegin];
+        if(*labelBegin != *GTBegin){            
+            loss += param_.getNodeLossMultiplier(nodeIndex) * param_.getLabelLossMultiplier(*labelBegin);
         }
     }
     return loss;
@@ -67,7 +127,7 @@ void GeneralizedHammingLoss::addLoss(GM& gm, IT gt) const
         opengm::ExplicitFunction<typename GM::ValueType,typename GM::IndexType, typename GM::LabelType> f(&numL, &(numL)+1, 0);
 
         for(typename GM::LabelType l = 0; l < numL; ++l){
-            f(l) = - param_.nodeLossMultiplier_[i] * param_.labelLossMultiplier_[l];
+            f(l) = - param_.getNodeLossMultiplier(i) * param_.getLabelLossMultiplier(l);
         }
 
         f(*gt) = 0;
diff --git a/include/opengm/learning/loss/hammingloss.hxx b/include/opengm/learning/loss/hammingloss.hxx
index 8500f76..783d615 100644
--- a/include/opengm/learning/loss/hammingloss.hxx
+++ b/include/opengm/learning/loss/hammingloss.hxx
@@ -3,6 +3,8 @@
 #define OPENGM_HAMMING_LOSS_HXX
 
 #include "opengm/functions/explicit_function.hxx"
+#include "opengm/graphicalmodel/graphicalmodel_hdf5.hxx"
+
 namespace opengm {
    namespace learning {
       class HammingLoss{
@@ -18,6 +20,16 @@ namespace opengm {
             bool operator>(const HammingLoss & other) const{
                 return false;
             }
+            /**
+             * serializes the parameter object to the given hdf5 group handle;
+             * the group must contain a dataset "lossType" containing the
+             * loss type as a string
+             **/
+            void save(hid_t& groupHandle) const;
+            inline void load(const hid_t& ) {}
+            static std::size_t getLossId() { return lossId_; }
+          private:
+            static const std::size_t lossId_ = 16000;
           };
 
       public:
@@ -32,6 +44,12 @@ namespace opengm {
          Parameter param_;
       };
 
+      inline void HammingLoss::Parameter::save(hid_t &groupHandle) const {
+          std::vector<std::size_t> name;
+          name.push_back(this->getLossId());
+          marray::hdf5::save(groupHandle,"lossId",name);
+      }
+
       template<class IT1, class IT2>
       double HammingLoss::loss(IT1 labelBegin, const IT1 labelEnd, IT2 GTBegin, const IT2 GTEnd) const
       {
diff --git a/include/opengm/learning/loss/noloss.hxx b/include/opengm/learning/loss/noloss.hxx
index 067bd6e..bce7acc 100644
--- a/include/opengm/learning/loss/noloss.hxx
+++ b/include/opengm/learning/loss/noloss.hxx
@@ -3,6 +3,8 @@
 #define OPENGM_NO_LOSS_HXX
 
 #include "opengm/functions/explicit_function.hxx"
+#include "opengm/graphicalmodel/graphicalmodel_hdf5.hxx"
+
 namespace opengm {
 namespace learning {
 
@@ -19,6 +21,16 @@ namespace learning {
             bool operator>(const NoLoss & other) const{
                 return false;
             }
+            /**
+             * serializes the parameter object to the given hdf5 group handle;
+             * the group must contain a dataset "lossType" containing the
+             * loss type as a string
+             **/
+            void save(hid_t& groupHandle) const;
+            inline void load(const hid_t& ) {}
+            static std::size_t getLossId() { return lossId_; }
+        private:
+            static const std::size_t lossId_ = 0;
         };
 
     public:
@@ -37,6 +49,12 @@ namespace learning {
 
     };
 
+    inline void NoLoss::Parameter::save(hid_t& groupHandle) const {
+        std::vector<std::size_t> name;
+        name.push_back(this->getLossId());
+        marray::hdf5::save(groupHandle,"lossId",name);
+    }
+
     template<class IT1, class IT2>
     double NoLoss::loss(IT1 labelBegin, const IT1 labelEnd, IT2 GTBegin, const IT2 GTEnd) const
     {
diff --git a/src/unittest/learning/test_dataset_io.cxx b/src/unittest/learning/test_dataset_io.cxx
index fc86946..53b77b7 100644
--- a/src/unittest/learning/test_dataset_io.cxx
+++ b/src/unittest/learning/test_dataset_io.cxx
@@ -16,6 +16,8 @@
 #include <opengm/learning/dataset/dataset.hxx>
 #include <opengm/learning/dataset/testdatasets.hxx>
 #include <opengm/learning/loss/noloss.hxx>
+#include <opengm/learning/loss/hammingloss.hxx>
+#include <opengm/learning/loss/generalized-hammingloss.hxx>
 
 
 //*************************************
@@ -24,10 +26,17 @@ typedef size_t IndexType;
 typedef size_t LabelType; 
 typedef opengm::meta::TypeListGenerator<opengm::ExplicitFunction<ValueType,IndexType,LabelType>, opengm::functions::learnable::LPotts<ValueType,IndexType,LabelType>, opengm::functions::learnable::SumOfExperts<ValueType,IndexType,LabelType> >::type FunctionListType;
 typedef opengm::GraphicalModel<ValueType,opengm::Adder, FunctionListType, opengm::DiscreteSpace<IndexType,LabelType> > GM; 
-typedef opengm::learning::NoLoss                 LOSS;
-typedef opengm::datasets::TestDataset1<GM,LOSS>  DS1;
-typedef opengm::datasets::TestDataset2<GM,LOSS>  DS2;
-typedef opengm::datasets::Dataset<GM,LOSS>       DS;
+typedef opengm::learning::NoLoss                 LOSS1;
+typedef opengm::learning::HammingLoss            LOSS2;
+typedef opengm::learning::GeneralizedHammingLoss LOSS3;
+typedef opengm::datasets::TestDataset1<GM,LOSS1>  DS11;
+typedef opengm::datasets::TestDataset2<GM,LOSS1>  DS21;
+typedef opengm::datasets::TestDataset1<GM,LOSS2>  DS12;
+typedef opengm::datasets::TestDataset2<GM,LOSS2>  DS22;
+typedef opengm::datasets::TestDataset1<GM,LOSS3>  DS13;
+typedef opengm::datasets::Dataset<GM,LOSS1>       DS1;
+typedef opengm::datasets::Dataset<GM,LOSS2>       DS2;
+typedef opengm::datasets::Dataset<GM,LOSS3>       DS3;
 
 //*************************************
 
@@ -36,23 +45,55 @@ int main() {
    std::cout << " Includes are fine :-) " << std::endl; 
   
    {
-      DS1 dataset;
-      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl; 
-      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset1_");
+      DS11 dataset;
+      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl;
+      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset11_");
       std::cout <<"done!" <<std::endl;
    }
-  
    {
-      DS2 dataset;
-      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl; 
-      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset2_");
+      DS12 dataset;
+      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl;
+      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset12_");
+      std::cout <<"done!" <<std::endl;
+   }
+   {
+      DS21 dataset;
+      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl;
+      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset21_");
       std::cout <<"done!" <<std::endl;
    }
-
    {
-      DS ds; 
-      opengm::datasets::DatasetSerialization::loadAll("./","dataset2_",ds);
-      //ds.loadAll("./","dataset2_");
+      DS22 dataset;
+      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl;
+      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset22_");
+      std::cout <<"done!" <<std::endl;
+   }
+   {
+      DS13 dataset;
+      std::cout << "Dataset includes " << dataset.getNumberOfModels() << " instances and has " << dataset.getNumberOfWeights() << " parameters."<<std::endl;
+      opengm::datasets::DatasetSerialization::save(dataset,"./","dataset13_");
+      std::cout <<"done!" <<std::endl;
    }
 
+   {
+      DS1 ds;
+      opengm::datasets::DatasetSerialization::loadAll("./","dataset11_",ds);
+   }
+   {
+      DS1 ds;
+      opengm::datasets::DatasetSerialization::loadAll("./","dataset21_",ds);
+   }
+   {
+      DS2 ds;
+      opengm::datasets::DatasetSerialization::loadAll("./","dataset12_",ds);
+   }
+   {
+      DS2 ds;
+      opengm::datasets::DatasetSerialization::loadAll("./","dataset22_",ds);
+   }
+   {
+      DS3 ds;
+      opengm::datasets::DatasetSerialization::loadAll("./","dataset13_",ds);
+   }
+   std::cout << "test successful." << std::endl;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/opengm.git