[opencv] 246/251: akaze: fix T-API interfaces, disable OpenCL code

Sun Aug 27 23:27:47 UTC 2017

This is an automated email from the git hooks/post-receive script.

iwamatsu pushed a commit to annotated tag 3.3.0
in repository opencv.

commit e0489cb4a629785405a4b37f566923a5700057cf
Author: Alexander Alekhin <alexander.a.alekhin at gmail.com>
Date:   Thu Aug 3 20:30:13 2017 +0000

    akaze: fix T-API interfaces, disable OpenCL code
    
    - OpenCL kernels don't handle matrices properly. Assumptions are not checked.
    - OpenCL/T-API integration is not correct.
---
 modules/features2d/src/kaze/AKAZEFeatures.cpp | 238 ++++++++++++--------------
 modules/features2d/src/kaze/AKAZEFeatures.h   |  13 +-
 2 files changed, 116 insertions(+), 135 deletions(-)

diff --git a/modules/features2d/src/kaze/AKAZEFeatures.cpp b/modules/features2d/src/kaze/AKAZEFeatures.cpp
index 16c2cad..5bcdb13 100644
--- a/modules/features2d/src/kaze/AKAZEFeatures.cpp
+++ b/modules/features2d/src/kaze/AKAZEFeatures.cpp
@@ -15,6 +15,10 @@
 
 #include <iostream>
 
+#ifdef HAVE_OPENCL // OpenCL is not well supported
+#undef HAVE_OPENCL
+#endif
+
 // Namespaces
 namespace cv
 {
@@ -251,38 +255,41 @@ private:
 
 #ifdef HAVE_OPENCL
 static inline bool
-ocl_non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size)
+ocl_non_linear_diffusion_step(InputArray Lt_, InputArray Lf_, OutputArray Lstep_, float step_size)
 {
-  if(!Lt.isContinuous())
-    return false;
+    if (!Lt_.isContinuous())
+        return false;
+
+    UMat Lt = Lt_.getUMat(), Lf = Lf_.getUMat(), Lstep = Lstep_.getUMat();
 
-  size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows};
+    size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows};
 
-  ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc);
-  if( ker.empty() )
-    return false;
+    ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc);
+    if (ker.empty())
+        return false;
 
-  return ker.args(
-    ocl::KernelArg::ReadOnly(Lt),
-    ocl::KernelArg::PtrReadOnly(Lf),
-    ocl::KernelArg::PtrWriteOnly(Lstep),
-    step_size).run(2, globalSize, 0, true);
+    return ker.args(
+            ocl::KernelArg::ReadOnly(Lt),
+            ocl::KernelArg::PtrReadOnly(Lf),
+            ocl::KernelArg::PtrWriteOnly(Lstep),
+            step_size)
+    .run(2, globalSize, 0, true);
 }
 #endif // HAVE_OPENCL
 
 static inline void
-non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size)
+non_linear_diffusion_step(InputArray Lt, InputArray Lf, OutputArray Lstep, float step_size)
 {
   CV_INSTRUMENT_REGION()
 
   Lstep.create(Lt.size(), Lt.type());
 
-  CV_OCL_RUN(true, ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size));
+#ifdef HAVE_OPENCL
+  CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lstep.isUMat()), ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size));
+#endif
 
-  // when on CPU UMats should be already allocated on CPU so getMat here is basicallly no-op
-  Mat Mstep = Lstep.getMat(ACCESS_WRITE);
-  parallel_for_(Range(0, Lt.rows), NonLinearScalarDiffusionStep(Lt.getMat(ACCESS_READ),
-    Lf.getMat(ACCESS_READ), Mstep, step_size));
+  Mat Mstep = Lstep.getMat();
+  parallel_for_(Range(0, Lt.rows()), NonLinearScalarDiffusionStep(Lt.getMat(), Lf.getMat(), Mstep, step_size));
 }
 
 /**
@@ -347,25 +354,28 @@ compute_kcontrast(const cv::Mat& Lx, const cv::Mat& Ly, float perc, int nbins)
 
 #ifdef HAVE_OPENCL
 static inline bool
-ocl_pm_g2(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast)
+ocl_pm_g2(InputArray Lx_, InputArray Ly_, OutputArray Lflow_, float kcontrast)
 {
-  int total = Lx.rows * Lx.cols;
-  size_t globalSize[] = {(size_t)total};
-
-  ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc);
-  if( ker.empty() )
-    return false;
-
-  return ker.args(
-    ocl::KernelArg::PtrReadOnly(Lx),
-    ocl::KernelArg::PtrReadOnly(Ly),
-    ocl::KernelArg::PtrWriteOnly(Lflow),
-    kcontrast, total).run(1, globalSize, 0, true);
+    UMat Lx = Lx_.getUMat(), Ly = Ly_.getUMat(), Lflow = Lflow_.getUMat();
+
+    int total = Lx.rows * Lx.cols;
+    size_t globalSize[] = {(size_t)total};
+
+    ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc);
+    if (ker.empty())
+        return false;
+
+    return ker.args(
+            ocl::KernelArg::PtrReadOnly(Lx),
+            ocl::KernelArg::PtrReadOnly(Ly),
+            ocl::KernelArg::PtrWriteOnly(Lflow),
+            kcontrast, total)
+    .run(1, globalSize, 0, true);
 }
 #endif // HAVE_OPENCL
 
 static inline void
-compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast, int diffusivity)
+compute_diffusivity(InputArray Lx, InputArray Ly, OutputArray Lflow, float kcontrast, int diffusivity)
 {
   CV_INSTRUMENT_REGION()
 
@@ -376,7 +386,9 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast
       pm_g1(Lx, Ly, Lflow, kcontrast);
     break;
     case KAZE::DIFF_PM_G2:
-      CV_OCL_RUN(true, ocl_pm_g2(Lx, Ly, Lflow, kcontrast));
+#ifdef HAVE_OPENCL
+      CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lflow.isUMat()), ocl_pm_g2(Lx, Ly, Lflow, kcontrast));
+#endif
       pm_g2(Lx, Ly, Lflow, kcontrast);
     break;
     case KAZE::DIFF_WEICKERT:
@@ -392,32 +404,6 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast
 }
 
 /**
- * @brief Fetches pyramid from the gpu.
- * @details Setups mapping for matrices that might be probably on the GPU, if the
- * code executes with OpenCL. This will setup MLx, MLy, Mdet members in the pyramid with
- * mapping to respective UMats. This must be called before CPU-only parts of AKAZE, that work
- * only on these Mats.
- *
- * This prevents mapping/unmapping overhead (and possible uploads/downloads) that would occur, if
- * we just create Mats from UMats each time we need it later. This has devastating effects on OCL
- * performace.
- *
- * @param evolution Pyramid to download
- */
-static inline void downloadPyramid(std::vector<Evolution>& evolution)
-{
-  CV_INSTRUMENT_REGION()
-
-  for (size_t i = 0; i < evolution.size(); ++i) {
-    Evolution& e = evolution[i];
-    e.Mx = e.Lx.getMat(ACCESS_READ);
-    e.My = e.Ly.getMat(ACCESS_READ);
-    e.Mt = e.Lt.getMat(ACCESS_READ);
-    e.Mdet = e.Ldet.getMat(ACCESS_READ);
-  }
-}
-
-/**
  * @brief This method creates the nonlinear scale space for a given image
  * @param img Input image for which the nonlinear scale space needs to be created
  * @return 0 if the nonlinear scale space was created successfully, -1 otherwise
@@ -435,12 +421,11 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img)
   if (evolution_.size() == 1) {
     // we don't need to compute kcontrast factor
     Compute_Determinant_Hessian_Response();
-    downloadPyramid(evolution_);
     return;
   }
 
   // derivatives, flow and diffusion step
-  UMat Lx, Ly, Lsmooth, Lflow, Lstep;
+  Mat Lx, Ly, Lsmooth, Lflow, Lstep;
 
   // compute derivatives for computing k contrast
   GaussianBlur(img, Lsmooth, Size(5, 5), 1.0f, 1.0f, BORDER_REPLICATE);
@@ -448,8 +433,7 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img)
   Scharr(Lsmooth, Ly, CV_32F, 0, 1, 1, 0, BORDER_DEFAULT);
   Lsmooth.release();
   // compute the kcontrast factor
-  float kcontrast = compute_kcontrast(Lx.getMat(ACCESS_READ), Ly.getMat(ACCESS_READ),
-    options_.kcontrast_percentile, options_.kcontrast_nbins);
+  float kcontrast = compute_kcontrast(Lx, Ly, options_.kcontrast_percentile, options_.kcontrast_nbins);
 
   // Now generate the rest of evolution levels
   for (size_t i = 1; i < evolution_.size(); i++) {
@@ -483,31 +467,30 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img)
   }
 
   Compute_Determinant_Hessian_Response();
-  downloadPyramid(evolution_);
-
-  return;
 }
 
 /* ************************************************************************* */
 
 #ifdef HAVE_OPENCL
 static inline bool
-ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy,
-  UMat& Ldet, float sigma)
+ocl_compute_determinant(InputArray Lxx_, InputArray Lxy_, InputArray Lyy_, OutputArray Ldet_, float sigma)
 {
-  const int total = Lxx.rows * Lxx.cols;
-  size_t globalSize[] = {(size_t)total};
-
-  ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc);
-  if( ker.empty() )
-    return false;
-
-  return ker.args(
-    ocl::KernelArg::PtrReadOnly(Lxx),
-    ocl::KernelArg::PtrReadOnly(Lxy),
-    ocl::KernelArg::PtrReadOnly(Lyy),
-    ocl::KernelArg::PtrWriteOnly(Ldet),
-    sigma, total).run(1, globalSize, 0, true);
+    UMat Lxx = Lxx_.getUMat(), Lxy = Lxy_.getUMat(), Lyy = Lyy_.getUMat(), Ldet = Ldet_.getUMat();
+
+    const int total = Lxx.rows * Lxx.cols;
+    size_t globalSize[] = {(size_t)total};
+
+    ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc);
+    if (ker.empty())
+        return false;
+
+    return ker.args(
+            ocl::KernelArg::PtrReadOnly(Lxx),
+            ocl::KernelArg::PtrReadOnly(Lxy),
+            ocl::KernelArg::PtrReadOnly(Lyy),
+            ocl::KernelArg::PtrWriteOnly(Ldet),
+            sigma, total)
+    .run(1, globalSize, 0, true);
 }
 #endif // HAVE_OPENCL
 
@@ -521,27 +504,30 @@ ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy,
  * @param Ldet output determinant
  * @param sigma determinant will be scaled by this sigma
  */
-static inline void compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy,
-  UMat& Ldet, float sigma)
+static inline void compute_determinant(InputArray Lxx, InputArray Lxy, InputArray Lyy, OutputArray Ldet, float sigma)
 {
-  CV_INSTRUMENT_REGION()
+    CV_INSTRUMENT_REGION()
 
-  Ldet.create(Lxx.size(), Lxx.type());
+    Ldet.create(Lxx.size(), Lxx.type());
 
-  CV_OCL_RUN(true, ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma));
-
-  // output determinant
-  Mat Mxx = Lxx.getMat(ACCESS_READ), Mxy = Lxy.getMat(ACCESS_READ), Myy = Lyy.getMat(ACCESS_READ);
-  Mat Mdet = Ldet.getMat(ACCESS_WRITE);
-  float *lxx = Mxx.ptr<float>();
-  float *lxy = Mxy.ptr<float>();
-  float *lyy = Myy.ptr<float>();
-  float *ldet = Mdet.ptr<float>();
-  const int total = Lxx.cols * Lxx.rows;
-  for (int j = 0; j < total; j++) {
-    ldet[j] = (lxx[j] * lyy[j] - lxy[j] * lxy[j]) * sigma;
-  }
+#ifdef HAVE_OPENCL
+    CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Ldet.isUMat()), ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma));
+#endif
 
+    // output determinant
+    Mat Mxx = Lxx.getMat(), Mxy = Lxy.getMat(), Myy = Lyy.getMat(), Mdet = Ldet.getMat();
+    const int W = Mxx.cols, H = Mxx.rows;
+    for (int y = 0; y < H; y++)
+    {
+        float *lxx = Mxx.ptr<float>(y);
+        float *lxy = Mxy.ptr<float>(y);
+        float *lyy = Myy.ptr<float>(y);
+        float *ldet = Mdet.ptr<float>(y);
+        for (int x = 0; x < W; x++)
+        {
+            ldet[x] = (lxx[x] * lyy[x] - lxy[x] * lxy[x]) * sigma;
+        }
+    }
 }
 
 class DeterminantHessianResponse : public ParallelLoopBody
@@ -554,7 +540,7 @@ public:
 
   void operator()(const Range& range) const
   {
-    UMat Lxx, Lxy, Lyy;
+    Mat Lxx, Lxy, Lyy;
 
     for (int i = range.start; i < range.end; i++)
     {
@@ -670,16 +656,16 @@ public:
       const Evolution &e = (*evolution_)[i];
       Mat &kpts = (*keypoints_by_layers_)[i];
       // this mask will hold positions of keypoints in this level
-      kpts = Mat::zeros(e.Mdet.size(), CV_8UC1);
+      kpts = Mat::zeros(e.Ldet.size(), CV_8UC1);
 
       // if border is too big we shouldn't search any keypoints
       if (e.border + 1 >= e.Ldet.rows)
         continue;
 
-      const float * prev = e.Mdet.ptr<float>(e.border - 1);
-      const float * curr = e.Mdet.ptr<float>(e.border    );
-      const float * next = e.Mdet.ptr<float>(e.border + 1);
-      const float * ldet = e.Mdet.ptr<float>();
+      const float * prev = e.Ldet.ptr<float>(e.border - 1);
+      const float * curr = e.Ldet.ptr<float>(e.border    );
+      const float * next = e.Ldet.ptr<float>(e.border + 1);
+      const float * ldet = e.Ldet.ptr<float>();
       uchar *mask = kpts.ptr<uchar>();
       const int search_radius = e.sigma_size; // size of keypoint in this level
 
@@ -743,8 +729,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector<Mat>& keypoints_by_laye
     const Mat &keypoints = keypoints_by_layers[i];
     const uchar *const kpts = keypoints_by_layers[i].ptr<uchar>();
     uchar *const kpts_prev = keypoints_by_layers[i-1].ptr<uchar>();
-    const float *const ldet = evolution_[i].Mdet.ptr<float>();
-    const float *const ldet_prev = evolution_[i-1].Mdet.ptr<float>();
+    const float *const ldet = evolution_[i].Ldet.ptr<float>();
+    const float *const ldet_prev = evolution_[i-1].Ldet.ptr<float>();
     // ratios are just powers of 2
     const int diff_ratio = (int)evolution_[i].octave_ratio / (int)evolution_[i-1].octave_ratio;
     const int search_radius = evolution_[i].sigma_size * diff_ratio; // size of keypoint in this level
@@ -775,8 +761,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector<Mat>& keypoints_by_laye
     const Mat &keypoints = keypoints_by_layers[i];
     const uchar *const kpts = keypoints_by_layers[i].ptr<uchar>();
     uchar *const kpts_next = keypoints_by_layers[i+1].ptr<uchar>();
-    const float *const ldet = evolution_[i].Mdet.ptr<float>();
-    const float *const ldet_next = evolution_[i+1].Mdet.ptr<float>();
+    const float *const ldet = evolution_[i].Ldet.ptr<float>();
+    const float *const ldet_next = evolution_[i+1].Ldet.ptr<float>();
     // ratios are just powers of 2, i+1 ratio is always greater or equal to i
     const int diff_ratio = (int)evolution_[i+1].octave_ratio / (int)evolution_[i].octave_ratio;
     const int search_radius = evolution_[i+1].sigma_size; // size of keypoints in upper level
@@ -814,7 +800,7 @@ void AKAZEFeatures::Do_Subpixel_Refinement(
 
   for (size_t i = 0; i < keypoints_by_layers.size(); i++) {
     const Evolution &e = evolution_[i];
-    const float * const ldet = e.Mdet.ptr<float>();
+    const float * const ldet = e.Ldet.ptr<float>();
     const float ratio = e.octave_ratio;
     const int cols = e.Ldet.cols;
     const Mat& keypoints = keypoints_by_layers[i];
@@ -1308,7 +1294,7 @@ void Compute_Main_Orientation(KeyPoint& kpt, const std::vector<Evolution>& evolu
   // Sample derivatives responses for the points within radius of 6*scale
   const int ang_size = 109;
   float resX[ang_size], resY[ang_size];
-  Sample_Derivative_Response_Radius6(e.Mx, e.My, x0, y0, scale, resX, resY);
+  Sample_Derivative_Response_Radius6(e.Lx, e.Ly, x0, y0, scale, resX, resY);
 
   // Compute the angle of each gradient vector
   float Ang[ang_size];
@@ -1445,8 +1431,8 @@ void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const
   ratio = (float)(1 << kpt.octave);
   scale = cvRound(0.5f*kpt.size / ratio);
   const int level = kpt.class_id;
-  Mat Lx = evolution[level].Mx;
-  Mat Ly = evolution[level].My;
+  const Mat Lx = evolution[level].Lx;
+  const Mat Ly = evolution[level].Ly;
   yf = kpt.pt.y / ratio;
   xf = kpt.pt.x / ratio;
 
@@ -1575,8 +1561,8 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f
   scale = cvRound(0.5f*kpt.size / ratio);
   angle = kpt.angle * static_cast<float>(CV_PI / 180.f);
   const int level = kpt.class_id;
-  Mat Lx = evolution[level].Mx;
-  Mat Ly = evolution[level].My;
+  const Mat Lx = evolution[level].Lx;
+  const Mat Ly = evolution[level].Ly;
   yf = kpt.pt.y / ratio;
   xf = kpt.pt.x / ratio;
   co = cos(angle);
@@ -1708,9 +1694,9 @@ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(cons
   ratio = (float)(1 << kpt.octave);
   scale = cvRound(0.5f*kpt.size / ratio);
   const int level = kpt.class_id;
-  Mat Lx = evolution[level].Mx;
-  Mat Ly = evolution[level].My;
-  Mat Lt = evolution[level].Mt;
+  const Mat Lx = evolution[level].Lx;
+  const Mat Ly = evolution[level].Ly;
+  const Mat Lt = evolution[level].Lt;
   yf = kpt.pt.y / ratio;
   xf = kpt.pt.x / ratio;
 
@@ -1795,9 +1781,9 @@ void MLDB_Full_Descriptor_Invoker::MLDB_Fill_Values(float* values, int sample_st
     int pattern_size = options_->descriptor_pattern_size;
     int chan = options_->descriptor_channels;
     int valpos = 0;
-    Mat Lx = evolution[level].Mx;
-    Mat Ly = evolution[level].My;
-    Mat Lt = evolution[level].Mt;
+    const Mat Lx = evolution[level].Lx;
+    const Mat Ly = evolution[level].Ly;
+    const Mat Lt = evolution[level].Lt;
 
     for (int i = -pattern_size; i < pattern_size; i += sample_step) {
         for (int j = -pattern_size; j < pattern_size; j += sample_step) {
@@ -1944,9 +1930,9 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint&
   int scale = cvRound(0.5f*kpt.size / ratio);
   float angle = kpt.angle * static_cast<float>(CV_PI / 180.f);
   const int level = kpt.class_id;
-  Mat Lx = evolution[level].Mx;
-  Mat Ly = evolution[level].My;
-  Mat Lt = evolution[level].Mt;
+  const Mat Lx = evolution[level].Lx;
+  const Mat Ly = evolution[level].Ly;
+  const Mat Lt = evolution[level].Lt;
   float yf = kpt.pt.y / ratio;
   float xf = kpt.pt.x / ratio;
   float co = cos(angle);
@@ -2051,9 +2037,9 @@ void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset(
   float ratio = (float)(1 << kpt.octave);
   int scale = cvRound(0.5f*kpt.size / ratio);
   const int level = kpt.class_id;
-  Mat Lx = evolution[level].Mx;
-  Mat Ly = evolution[level].My;
-  Mat Lt = evolution[level].Mt;
+  const Mat Lx = evolution[level].Lx;
+  const Mat Ly = evolution[level].Ly;
+  const Mat Lt = evolution[level].Lt;
   float yf = kpt.pt.y / ratio;
   float xf = kpt.pt.x / ratio;
 
diff --git a/modules/features2d/src/kaze/AKAZEFeatures.h b/modules/features2d/src/kaze/AKAZEFeatures.h
index 9f44b57..18dc5fd 100644
--- a/modules/features2d/src/kaze/AKAZEFeatures.h
+++ b/modules/features2d/src/kaze/AKAZEFeatures.h
@@ -29,15 +29,10 @@ struct Evolution
     border = 0;
   }
 
-  UMat Lx, Ly;           ///< First order spatial derivatives
-  UMat Lt;               ///< Evolution image
-  UMat Lsmooth;          ///< Smoothed image, used only for computing determinant, released afterwards
-  UMat Ldet;             ///< Detector response
-
-  // the same as above, holding CPU mapping to UMats above
-  Mat Mx, My;
-  Mat Mt;
-  Mat Mdet;
+  Mat Lx, Ly;           ///< First order spatial derivatives
+  Mat Lt;               ///< Evolution image
+  Mat Lsmooth;          ///< Smoothed image, used only for computing determinant, released afterwards
+  Mat Ldet;             ///< Detector response
 
   Size size;                ///< Size of the layer
   float etime;              ///< Evolution time

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/opencv.git