[opencv] 246/251: akaze: fix T-API interfaces, disable OpenCL code
Nobuhiro Iwamatsu
iwamatsu at moszumanska.debian.org
Sun Aug 27 23:27:47 UTC 2017
This is an automated email from the git hooks/post-receive script.
iwamatsu pushed a commit to annotated tag 3.3.0
in repository opencv.
commit e0489cb4a629785405a4b37f566923a5700057cf
Author: Alexander Alekhin <alexander.a.alekhin at gmail.com>
Date: Thu Aug 3 20:30:13 2017 +0000
akaze: fix T-API interfaces, disable OpenCL code
- OpenCL kernels don't handle matrices properly. Assumptions are not checked.
- OpenCL/T-API integration is not correct.
---
modules/features2d/src/kaze/AKAZEFeatures.cpp | 238 ++++++++++++--------------
modules/features2d/src/kaze/AKAZEFeatures.h | 13 +-
2 files changed, 116 insertions(+), 135 deletions(-)
diff --git a/modules/features2d/src/kaze/AKAZEFeatures.cpp b/modules/features2d/src/kaze/AKAZEFeatures.cpp
index 16c2cad..5bcdb13 100644
--- a/modules/features2d/src/kaze/AKAZEFeatures.cpp
+++ b/modules/features2d/src/kaze/AKAZEFeatures.cpp
@@ -15,6 +15,10 @@
#include <iostream>
+#ifdef HAVE_OPENCL // OpenCL is not well supported
+#undef HAVE_OPENCL
+#endif
+
// Namespaces
namespace cv
{
@@ -251,38 +255,41 @@ private:
#ifdef HAVE_OPENCL
static inline bool
-ocl_non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size)
+ocl_non_linear_diffusion_step(InputArray Lt_, InputArray Lf_, OutputArray Lstep_, float step_size)
{
- if(!Lt.isContinuous())
- return false;
+ if (!Lt_.isContinuous())
+ return false;
+
+ UMat Lt = Lt_.getUMat(), Lf = Lf_.getUMat(), Lstep = Lstep_.getUMat();
- size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows};
+ size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows};
- ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc);
- if( ker.empty() )
- return false;
+ ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc);
+ if (ker.empty())
+ return false;
- return ker.args(
- ocl::KernelArg::ReadOnly(Lt),
- ocl::KernelArg::PtrReadOnly(Lf),
- ocl::KernelArg::PtrWriteOnly(Lstep),
- step_size).run(2, globalSize, 0, true);
+ return ker.args(
+ ocl::KernelArg::ReadOnly(Lt),
+ ocl::KernelArg::PtrReadOnly(Lf),
+ ocl::KernelArg::PtrWriteOnly(Lstep),
+ step_size)
+ .run(2, globalSize, 0, true);
}
#endif // HAVE_OPENCL
static inline void
-non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size)
+non_linear_diffusion_step(InputArray Lt, InputArray Lf, OutputArray Lstep, float step_size)
{
CV_INSTRUMENT_REGION()
Lstep.create(Lt.size(), Lt.type());
- CV_OCL_RUN(true, ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size));
+#ifdef HAVE_OPENCL
+ CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lstep.isUMat()), ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size));
+#endif
- // when on CPU UMats should be already allocated on CPU so getMat here is basicallly no-op
- Mat Mstep = Lstep.getMat(ACCESS_WRITE);
- parallel_for_(Range(0, Lt.rows), NonLinearScalarDiffusionStep(Lt.getMat(ACCESS_READ),
- Lf.getMat(ACCESS_READ), Mstep, step_size));
+ Mat Mstep = Lstep.getMat();
+ parallel_for_(Range(0, Lt.rows()), NonLinearScalarDiffusionStep(Lt.getMat(), Lf.getMat(), Mstep, step_size));
}
/**
@@ -347,25 +354,28 @@ compute_kcontrast(const cv::Mat& Lx, const cv::Mat& Ly, float perc, int nbins)
#ifdef HAVE_OPENCL
static inline bool
-ocl_pm_g2(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast)
+ocl_pm_g2(InputArray Lx_, InputArray Ly_, OutputArray Lflow_, float kcontrast)
{
- int total = Lx.rows * Lx.cols;
- size_t globalSize[] = {(size_t)total};
-
- ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc);
- if( ker.empty() )
- return false;
-
- return ker.args(
- ocl::KernelArg::PtrReadOnly(Lx),
- ocl::KernelArg::PtrReadOnly(Ly),
- ocl::KernelArg::PtrWriteOnly(Lflow),
- kcontrast, total).run(1, globalSize, 0, true);
+ UMat Lx = Lx_.getUMat(), Ly = Ly_.getUMat(), Lflow = Lflow_.getUMat();
+
+ int total = Lx.rows * Lx.cols;
+ size_t globalSize[] = {(size_t)total};
+
+ ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc);
+ if (ker.empty())
+ return false;
+
+ return ker.args(
+ ocl::KernelArg::PtrReadOnly(Lx),
+ ocl::KernelArg::PtrReadOnly(Ly),
+ ocl::KernelArg::PtrWriteOnly(Lflow),
+ kcontrast, total)
+ .run(1, globalSize, 0, true);
}
#endif // HAVE_OPENCL
static inline void
-compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast, int diffusivity)
+compute_diffusivity(InputArray Lx, InputArray Ly, OutputArray Lflow, float kcontrast, int diffusivity)
{
CV_INSTRUMENT_REGION()
@@ -376,7 +386,9 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast
pm_g1(Lx, Ly, Lflow, kcontrast);
break;
case KAZE::DIFF_PM_G2:
- CV_OCL_RUN(true, ocl_pm_g2(Lx, Ly, Lflow, kcontrast));
+#ifdef HAVE_OPENCL
+ CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lflow.isUMat()), ocl_pm_g2(Lx, Ly, Lflow, kcontrast));
+#endif
pm_g2(Lx, Ly, Lflow, kcontrast);
break;
case KAZE::DIFF_WEICKERT:
@@ -392,32 +404,6 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast
}
/**
- * @brief Fetches pyramid from the gpu.
- * @details Setups mapping for matrices that might be probably on the GPU, if the
- * code executes with OpenCL. This will setup MLx, MLy, Mdet members in the pyramid with
- * mapping to respective UMats. This must be called before CPU-only parts of AKAZE, that work
- * only on these Mats.
- *
- * This prevents mapping/unmapping overhead (and possible uploads/downloads) that would occur, if
- * we just create Mats from UMats each time we need it later. This has devastating effects on OCL
- * performace.
- *
- * @param evolution Pyramid to download
- */
-static inline void downloadPyramid(std::vector<Evolution>& evolution)
-{
- CV_INSTRUMENT_REGION()
-
- for (size_t i = 0; i < evolution.size(); ++i) {
- Evolution& e = evolution[i];
- e.Mx = e.Lx.getMat(ACCESS_READ);
- e.My = e.Ly.getMat(ACCESS_READ);
- e.Mt = e.Lt.getMat(ACCESS_READ);
- e.Mdet = e.Ldet.getMat(ACCESS_READ);
- }
-}
-
-/**
* @brief This method creates the nonlinear scale space for a given image
* @param img Input image for which the nonlinear scale space needs to be created
* @return 0 if the nonlinear scale space was created successfully, -1 otherwise
@@ -435,12 +421,11 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img)
if (evolution_.size() == 1) {
// we don't need to compute kcontrast factor
Compute_Determinant_Hessian_Response();
- downloadPyramid(evolution_);
return;
}
// derivatives, flow and diffusion step
- UMat Lx, Ly, Lsmooth, Lflow, Lstep;
+ Mat Lx, Ly, Lsmooth, Lflow, Lstep;
// compute derivatives for computing k contrast
GaussianBlur(img, Lsmooth, Size(5, 5), 1.0f, 1.0f, BORDER_REPLICATE);
@@ -448,8 +433,7 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img)
Scharr(Lsmooth, Ly, CV_32F, 0, 1, 1, 0, BORDER_DEFAULT);
Lsmooth.release();
// compute the kcontrast factor
- float kcontrast = compute_kcontrast(Lx.getMat(ACCESS_READ), Ly.getMat(ACCESS_READ),
- options_.kcontrast_percentile, options_.kcontrast_nbins);
+ float kcontrast = compute_kcontrast(Lx, Ly, options_.kcontrast_percentile, options_.kcontrast_nbins);
// Now generate the rest of evolution levels
for (size_t i = 1; i < evolution_.size(); i++) {
@@ -483,31 +467,30 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img)
}
Compute_Determinant_Hessian_Response();
- downloadPyramid(evolution_);
-
- return;
}
/* ************************************************************************* */
#ifdef HAVE_OPENCL
static inline bool
-ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy,
- UMat& Ldet, float sigma)
+ocl_compute_determinant(InputArray Lxx_, InputArray Lxy_, InputArray Lyy_, OutputArray Ldet_, float sigma)
{
- const int total = Lxx.rows * Lxx.cols;
- size_t globalSize[] = {(size_t)total};
-
- ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc);
- if( ker.empty() )
- return false;
-
- return ker.args(
- ocl::KernelArg::PtrReadOnly(Lxx),
- ocl::KernelArg::PtrReadOnly(Lxy),
- ocl::KernelArg::PtrReadOnly(Lyy),
- ocl::KernelArg::PtrWriteOnly(Ldet),
- sigma, total).run(1, globalSize, 0, true);
+ UMat Lxx = Lxx_.getUMat(), Lxy = Lxy_.getUMat(), Lyy = Lyy_.getUMat(), Ldet = Ldet_.getUMat();
+
+ const int total = Lxx.rows * Lxx.cols;
+ size_t globalSize[] = {(size_t)total};
+
+ ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc);
+ if (ker.empty())
+ return false;
+
+ return ker.args(
+ ocl::KernelArg::PtrReadOnly(Lxx),
+ ocl::KernelArg::PtrReadOnly(Lxy),
+ ocl::KernelArg::PtrReadOnly(Lyy),
+ ocl::KernelArg::PtrWriteOnly(Ldet),
+ sigma, total)
+ .run(1, globalSize, 0, true);
}
#endif // HAVE_OPENCL
@@ -521,27 +504,30 @@ ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy,
* @param Ldet output determinant
* @param sigma determinant will be scaled by this sigma
*/
-static inline void compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy,
- UMat& Ldet, float sigma)
+static inline void compute_determinant(InputArray Lxx, InputArray Lxy, InputArray Lyy, OutputArray Ldet, float sigma)
{
- CV_INSTRUMENT_REGION()
+ CV_INSTRUMENT_REGION()
- Ldet.create(Lxx.size(), Lxx.type());
+ Ldet.create(Lxx.size(), Lxx.type());
- CV_OCL_RUN(true, ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma));
-
- // output determinant
- Mat Mxx = Lxx.getMat(ACCESS_READ), Mxy = Lxy.getMat(ACCESS_READ), Myy = Lyy.getMat(ACCESS_READ);
- Mat Mdet = Ldet.getMat(ACCESS_WRITE);
- float *lxx = Mxx.ptr<float>();
- float *lxy = Mxy.ptr<float>();
- float *lyy = Myy.ptr<float>();
- float *ldet = Mdet.ptr<float>();
- const int total = Lxx.cols * Lxx.rows;
- for (int j = 0; j < total; j++) {
- ldet[j] = (lxx[j] * lyy[j] - lxy[j] * lxy[j]) * sigma;
- }
+#ifdef HAVE_OPENCL
+ CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Ldet.isUMat()), ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma));
+#endif
+ // output determinant
+ Mat Mxx = Lxx.getMat(), Mxy = Lxy.getMat(), Myy = Lyy.getMat(), Mdet = Ldet.getMat();
+ const int W = Mxx.cols, H = Mxx.rows;
+ for (int y = 0; y < H; y++)
+ {
+ float *lxx = Mxx.ptr<float>(y);
+ float *lxy = Mxy.ptr<float>(y);
+ float *lyy = Myy.ptr<float>(y);
+ float *ldet = Mdet.ptr<float>(y);
+ for (int x = 0; x < W; x++)
+ {
+ ldet[x] = (lxx[x] * lyy[x] - lxy[x] * lxy[x]) * sigma;
+ }
+ }
}
class DeterminantHessianResponse : public ParallelLoopBody
@@ -554,7 +540,7 @@ public:
void operator()(const Range& range) const
{
- UMat Lxx, Lxy, Lyy;
+ Mat Lxx, Lxy, Lyy;
for (int i = range.start; i < range.end; i++)
{
@@ -670,16 +656,16 @@ public:
const Evolution &e = (*evolution_)[i];
Mat &kpts = (*keypoints_by_layers_)[i];
// this mask will hold positions of keypoints in this level
- kpts = Mat::zeros(e.Mdet.size(), CV_8UC1);
+ kpts = Mat::zeros(e.Ldet.size(), CV_8UC1);
// if border is too big we shouldn't search any keypoints
if (e.border + 1 >= e.Ldet.rows)
continue;
- const float * prev = e.Mdet.ptr<float>(e.border - 1);
- const float * curr = e.Mdet.ptr<float>(e.border );
- const float * next = e.Mdet.ptr<float>(e.border + 1);
- const float * ldet = e.Mdet.ptr<float>();
+ const float * prev = e.Ldet.ptr<float>(e.border - 1);
+ const float * curr = e.Ldet.ptr<float>(e.border );
+ const float * next = e.Ldet.ptr<float>(e.border + 1);
+ const float * ldet = e.Ldet.ptr<float>();
uchar *mask = kpts.ptr<uchar>();
const int search_radius = e.sigma_size; // size of keypoint in this level
@@ -743,8 +729,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector<Mat>& keypoints_by_laye
const Mat &keypoints = keypoints_by_layers[i];
const uchar *const kpts = keypoints_by_layers[i].ptr<uchar>();
uchar *const kpts_prev = keypoints_by_layers[i-1].ptr<uchar>();
- const float *const ldet = evolution_[i].Mdet.ptr<float>();
- const float *const ldet_prev = evolution_[i-1].Mdet.ptr<float>();
+ const float *const ldet = evolution_[i].Ldet.ptr<float>();
+ const float *const ldet_prev = evolution_[i-1].Ldet.ptr<float>();
// ratios are just powers of 2
const int diff_ratio = (int)evolution_[i].octave_ratio / (int)evolution_[i-1].octave_ratio;
const int search_radius = evolution_[i].sigma_size * diff_ratio; // size of keypoint in this level
@@ -775,8 +761,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector<Mat>& keypoints_by_laye
const Mat &keypoints = keypoints_by_layers[i];
const uchar *const kpts = keypoints_by_layers[i].ptr<uchar>();
uchar *const kpts_next = keypoints_by_layers[i+1].ptr<uchar>();
- const float *const ldet = evolution_[i].Mdet.ptr<float>();
- const float *const ldet_next = evolution_[i+1].Mdet.ptr<float>();
+ const float *const ldet = evolution_[i].Ldet.ptr<float>();
+ const float *const ldet_next = evolution_[i+1].Ldet.ptr<float>();
// ratios are just powers of 2, i+1 ratio is always greater or equal to i
const int diff_ratio = (int)evolution_[i+1].octave_ratio / (int)evolution_[i].octave_ratio;
const int search_radius = evolution_[i+1].sigma_size; // size of keypoints in upper level
@@ -814,7 +800,7 @@ void AKAZEFeatures::Do_Subpixel_Refinement(
for (size_t i = 0; i < keypoints_by_layers.size(); i++) {
const Evolution &e = evolution_[i];
- const float * const ldet = e.Mdet.ptr<float>();
+ const float * const ldet = e.Ldet.ptr<float>();
const float ratio = e.octave_ratio;
const int cols = e.Ldet.cols;
const Mat& keypoints = keypoints_by_layers[i];
@@ -1308,7 +1294,7 @@ void Compute_Main_Orientation(KeyPoint& kpt, const std::vector<Evolution>& evolu
// Sample derivatives responses for the points within radius of 6*scale
const int ang_size = 109;
float resX[ang_size], resY[ang_size];
- Sample_Derivative_Response_Radius6(e.Mx, e.My, x0, y0, scale, resX, resY);
+ Sample_Derivative_Response_Radius6(e.Lx, e.Ly, x0, y0, scale, resX, resY);
// Compute the angle of each gradient vector
float Ang[ang_size];
@@ -1445,8 +1431,8 @@ void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const
ratio = (float)(1 << kpt.octave);
scale = cvRound(0.5f*kpt.size / ratio);
const int level = kpt.class_id;
- Mat Lx = evolution[level].Mx;
- Mat Ly = evolution[level].My;
+ const Mat Lx = evolution[level].Lx;
+ const Mat Ly = evolution[level].Ly;
yf = kpt.pt.y / ratio;
xf = kpt.pt.x / ratio;
@@ -1575,8 +1561,8 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f
scale = cvRound(0.5f*kpt.size / ratio);
angle = kpt.angle * static_cast<float>(CV_PI / 180.f);
const int level = kpt.class_id;
- Mat Lx = evolution[level].Mx;
- Mat Ly = evolution[level].My;
+ const Mat Lx = evolution[level].Lx;
+ const Mat Ly = evolution[level].Ly;
yf = kpt.pt.y / ratio;
xf = kpt.pt.x / ratio;
co = cos(angle);
@@ -1708,9 +1694,9 @@ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(cons
ratio = (float)(1 << kpt.octave);
scale = cvRound(0.5f*kpt.size / ratio);
const int level = kpt.class_id;
- Mat Lx = evolution[level].Mx;
- Mat Ly = evolution[level].My;
- Mat Lt = evolution[level].Mt;
+ const Mat Lx = evolution[level].Lx;
+ const Mat Ly = evolution[level].Ly;
+ const Mat Lt = evolution[level].Lt;
yf = kpt.pt.y / ratio;
xf = kpt.pt.x / ratio;
@@ -1795,9 +1781,9 @@ void MLDB_Full_Descriptor_Invoker::MLDB_Fill_Values(float* values, int sample_st
int pattern_size = options_->descriptor_pattern_size;
int chan = options_->descriptor_channels;
int valpos = 0;
- Mat Lx = evolution[level].Mx;
- Mat Ly = evolution[level].My;
- Mat Lt = evolution[level].Mt;
+ const Mat Lx = evolution[level].Lx;
+ const Mat Ly = evolution[level].Ly;
+ const Mat Lt = evolution[level].Lt;
for (int i = -pattern_size; i < pattern_size; i += sample_step) {
for (int j = -pattern_size; j < pattern_size; j += sample_step) {
@@ -1944,9 +1930,9 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint&
int scale = cvRound(0.5f*kpt.size / ratio);
float angle = kpt.angle * static_cast<float>(CV_PI / 180.f);
const int level = kpt.class_id;
- Mat Lx = evolution[level].Mx;
- Mat Ly = evolution[level].My;
- Mat Lt = evolution[level].Mt;
+ const Mat Lx = evolution[level].Lx;
+ const Mat Ly = evolution[level].Ly;
+ const Mat Lt = evolution[level].Lt;
float yf = kpt.pt.y / ratio;
float xf = kpt.pt.x / ratio;
float co = cos(angle);
@@ -2051,9 +2037,9 @@ void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset(
float ratio = (float)(1 << kpt.octave);
int scale = cvRound(0.5f*kpt.size / ratio);
const int level = kpt.class_id;
- Mat Lx = evolution[level].Mx;
- Mat Ly = evolution[level].My;
- Mat Lt = evolution[level].Mt;
+ const Mat Lx = evolution[level].Lx;
+ const Mat Ly = evolution[level].Ly;
+ const Mat Lt = evolution[level].Lt;
float yf = kpt.pt.y / ratio;
float xf = kpt.pt.x / ratio;
diff --git a/modules/features2d/src/kaze/AKAZEFeatures.h b/modules/features2d/src/kaze/AKAZEFeatures.h
index 9f44b57..18dc5fd 100644
--- a/modules/features2d/src/kaze/AKAZEFeatures.h
+++ b/modules/features2d/src/kaze/AKAZEFeatures.h
@@ -29,15 +29,10 @@ struct Evolution
border = 0;
}
- UMat Lx, Ly; ///< First order spatial derivatives
- UMat Lt; ///< Evolution image
- UMat Lsmooth; ///< Smoothed image, used only for computing determinant, released afterwards
- UMat Ldet; ///< Detector response
-
- // the same as above, holding CPU mapping to UMats above
- Mat Mx, My;
- Mat Mt;
- Mat Mdet;
+ Mat Lx, Ly; ///< First order spatial derivatives
+ Mat Lt; ///< Evolution image
+ Mat Lsmooth; ///< Smoothed image, used only for computing determinant, released afterwards
+ Mat Ldet; ///< Detector response
Size size; ///< Size of the layer
float etime; ///< Evolution time
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/opencv.git
More information about the debian-science-commits
mailing list