[arrayfire] 319/408: Reorganizing non free build process.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:21 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit b372e49c92673cf42ddc646f81acda86e36676a4
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Aug 24 03:17:25 2015 -0400
Reorganizing non free build process.
- BUILD_NONFREE is OFF by default
---
CMakeLists.txt | 2 +-
src/backend/cpu/sift.cpp | 1019 +------------------
src/backend/cpu/sift_nonfree.hpp | 1033 ++++++++++++++++++++
.../cuda/kernel/{sift.hpp => sift_nonfree.hpp} | 0
src/backend/cuda/sift.cu | 9 +-
.../opencl/kernel/{sift.cl => sift_nonfree.cl} | 0
.../opencl/kernel/{sift.hpp => sift_nonfree.hpp} | 4 +-
src/backend/opencl/sift.cpp | 19 +-
test/CMakeLists.txt | 2 +-
test/{sift.cpp => sift_nonfree.cpp} | 0
10 files changed, 1069 insertions(+), 1019 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0138cd7..e88a176 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,7 +30,7 @@ OPTION(BUILD_GRAPHICS "Build ArrayFire with Forge Graphics" ON)
OPTION(BUILD_DOCS "Create ArrayFire Documentation" OFF)
OPTION(WITH_COVERAGE "Added code coverage flags" OFF)
-OPTION(BUILD_NONFREE "Build ArrayFire nonfree algorithms" ON)
+OPTION(BUILD_NONFREE "Build ArrayFire nonfree algorithms" OFF)
# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
diff --git a/src/backend/cpu/sift.cpp b/src/backend/cpu/sift.cpp
index 8f872fe..421549a 100644
--- a/src/backend/cpu/sift.cpp
+++ b/src/backend/cpu/sift.cpp
@@ -7,69 +7,6 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-// The source code contained in this file is based on the original code by
-// Rob Hess. Please note that SIFT is an algorithm patented and protected
-// by US law, before using this code or any binary forms generated from it,
-// verify that you have permission to do so. The original license by Rob Hess
-// can be read below:
-//
-// Copyright (c) 2006-2012, Rob Hess <rob at iqengines.com>
-// All rights reserved.
-//
-// The following patent has been issued for methods embodied in this
-// software: "Method and apparatus for identifying scale invariant features
-// in an image and use of same for locating an object in an image," David
-// G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application
-// filed March 8, 1999. Asignee: The University of British Columbia. For
-// further details, contact David Lowe (lowe at cs.ubc.ca) or the
-// University-Industry Liaison Office of the University of British
-// Columbia.
-//
-// Note that restrictions imposed by this patent (and possibly others)
-// exist independently of and may be in conflict with the freedoms granted
-// in this license, which refers to copyright of the program, not patents
-// for any methods that it implements. Both copyright and patent law must
-// be obeyed to legally use and redistribute this program and it is not the
-// purpose of this license to induce you to infringe any patents or other
-// property right claims or to contest validity of any such claims. If you
-// redistribute or use the program, then this license merely protects you
-// from committing copyright infringement. It does not protect you from
-// committing patent infringement. So, before you do anything with this
-// program, make sure that you have permission to do so not merely in terms
-// of copyright, but also in terms of patent law.
-//
-// Please note that this license is not to be understood as a guarantee
-// either. If you use the program according to this license, but in
-// conflict with patent law, it does not mean that the licensor will refund
-// you for any losses that you incur if you are sued for your patent
-// infringement.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-// * Redistributions of source code must retain the above copyright and
-// patent notices, this list of conditions and the following
-// disclaimer.
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in
-// the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Oregon State University nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
#include <af/dim4.hpp>
#include <af/defines.h>
#include <ArrayInfo.hpp>
@@ -84,745 +21,15 @@
#include <cfloat>
#include <vector>
+#ifdef AF_NONFREE
+#include <sift_nonfree.hpp>
+#endif
+
using af::dim4;
namespace cpu
{
-static const float PI_VAL = 3.14159265358979323846f;
-
-// default width of descriptor histogram array
-static const int DescrWidth = 4;
-
-// default number of bins per histogram in descriptor array
-static const int DescrHistBins = 8;
-
-// assumed gaussian blur for input image
-static const float InitSigma = 0.5f;
-
-// width of border in which to ignore keypoints
-static const int ImgBorder = 5;
-
-// maximum steps of keypoint interpolation before failure
-static const int MaxInterpSteps = 5;
-
-// default number of bins in histogram for orientation assignment
-static const int OriHistBins = 36;
-
-// determines gaussian sigma for orientation assignment
-static const float OriSigFctr = 1.5f;
-
-// determines the radius of the region used in orientation assignment */
-static const float OriRadius = 3.0f * OriSigFctr;
-
-// number of passes of orientation histogram smoothing
-static const int SmoothOriPasses = 2;
-
-// orientation magnitude relative to max that results in new feature
-static const float OriPeakRatio = 0.8f;
-
-// determines the size of a single descriptor orientation histogram
-static const float DescrSclFctr = 3.f;
-
-// threshold on magnitude of elements of descriptor vector
-static const float DescrMagThr = 0.2f;
-
-// factor used to convert floating-point descriptor to unsigned char
-static const float IntDescrFctr = 512.f;
-
-typedef struct
-{
- float f[4];
- unsigned l;
-} feat_t;
-
-bool feat_cmp(feat_t i, feat_t j)
-{
- for (int k = 0; k < 4; k++)
- if (i.f[k] != j.f[k])
- return (i.f[k] < j.f[k]);
- if (i.l != j.l)
- return (i.l < j.l);
-
- return true;
-}
-
-void array_to_feat(std::vector<feat_t>& feat, float *x, float *y, unsigned *layer, float *resp, float *size, unsigned nfeat)
-{
- feat.resize(nfeat);
- for (unsigned i = 0; i < feat.size(); i++) {
- feat[i].f[0] = x[i];
- feat[i].f[1] = y[i];
- feat[i].f[2] = resp[i];
- feat[i].f[3] = size[i];
- feat[i].l = layer[i];
- }
-}
-
-template<typename T>
-void gaussian1D(T* out, const int dim, double sigma=0.0)
-{
- if(!(sigma>0)) sigma = 0.25*dim;
-
- T sum = (T)0;
- for(int i=0;i<dim;i++)
- {
- int x = i-(dim-1)/2;
- T el = 1. / sqrt(2 * PI_VAL * sigma*sigma) * exp(-((x*x)/(2*(sigma*sigma))));
- out[i] = el;
- sum += el;
- }
-
- for(int k=0;k<dim;k++)
- out[k] /= sum;
-}
-
-template<typename T>
-Array<T> gauss_filter(float sigma)
-{
- // Using 6-sigma rule
- unsigned gauss_len = (unsigned)round(sigma * 6 + 1) | 1;
-
- Array<T> filter = createEmptyArray<T>(gauss_len);
- gaussian1D((T*)getDevicePtr(filter), gauss_len, sigma);
-
- return filter;
-}
-
-template<int N>
-void gaussianElimination(float* A, float* b, float* x)
-{
- // forward elimination
- for (int i = 0; i < N-1; i++) {
- for (int j = i+1; j < N; j++) {
- float s = A[j*N+i] / A[i*N+i];
-
- for (int k = i; k < N; k++)
- A[j*N+k] -= s * A[i*N+k];
-
- b[j] -= s * b[i];
- }
- }
-
- for (int i = 0; i < N; i++)
- x[i] = 0;
-
- // backward substitution
- float sum = 0;
- for (int i = 0; i <= N-2; i++) {
- sum = b[i];
- for (int j = i+1; j < N; j++)
- sum -= A[i*N+j] * x[j];
- x[i] = sum / A[i*N+i];
- }
-}
-
-template<typename T>
-void sub(
- Array<T>& out,
- const Array<T>& in1,
- const Array<T>& in2)
-{
- size_t nel = in1.elements();
- T* out_ptr = out.get();
- const T* in1_ptr = in1.get();
- const T* in2_ptr = in2.get();
-
- for (size_t i = 0; i < nel; i++) {
- out_ptr[i] = in1_ptr[i] - in2_ptr[i];
- }
-}
-
-#define CPTR(Y, X) (center_ptr[(Y) * idims[0] + (X)])
-#define PPTR(Y, X) (prev_ptr[(Y) * idims[0] + (X)])
-#define NPTR(Y, X) (next_ptr[(Y) * idims[0] + (X)])
-
-// Determines whether a pixel is a scale-space extremum by comparing it to its
-// 3x3x3 pixel neighborhood.
-template<typename T>
-void detectExtrema(
- float* x_out,
- float* y_out,
- unsigned* layer_out,
- unsigned* counter,
- const Array<T>& prev,
- const Array<T>& center,
- const Array<T>& next,
- const unsigned layer,
- const unsigned max_feat,
- const float threshold)
-{
- const af::dim4 idims = center.dims();
- const T* prev_ptr = prev.get();
- const T* center_ptr = center.get();
- const T* next_ptr = next.get();
-
- for (int y = ImgBorder; y < idims[1]-ImgBorder; y++) {
- for (int x = ImgBorder; x < idims[0]-ImgBorder; x++) {
- float p = center_ptr[y*idims[0] + x];
-
- // Find extrema
- if (abs((float)p) > threshold &&
- ((p > 0 && p > CPTR(y-1, x-1) && p > CPTR(y-1, x) &&
- p > CPTR(y-1, x+1) && p > CPTR(y, x-1) && p > CPTR(y, x+1) &&
- p > CPTR(y+1, x-1) && p > CPTR(y+1, x) && p > CPTR(y+1, x+1) &&
- p > PPTR(y-1, x-1) && p > PPTR(y-1, x) && p > PPTR(y-1, x+1) &&
- p > PPTR(y, x-1) && p > PPTR(y , x) && p > PPTR(y, x+1) &&
- p > PPTR(y+1, x-1) && p > PPTR(y+1, x) && p > PPTR(y+1, x+1) &&
- p > NPTR(y-1, x-1) && p > NPTR(y-1, x) && p > NPTR(y-1, x+1) &&
- p > NPTR(y, x-1) && p > NPTR(y , x) && p > NPTR(y, x+1) &&
- p > NPTR(y+1, x-1) && p > NPTR(y+1, x) && p > NPTR(y+1, x+1)) ||
- (p < 0 && p < CPTR(y-1, x-1) && p < CPTR(y-1, x) &&
- p < CPTR(y-1, x+1) && p < CPTR(y, x-1) && p < CPTR(y, x+1) &&
- p < CPTR(y+1, x-1) && p < CPTR(y+1, x) && p < CPTR(y+1, x+1) &&
- p < PPTR(y-1, x-1) && p < PPTR(y-1, x) && p < PPTR(y-1, x+1) &&
- p < PPTR(y, x-1) && p < PPTR(y , x) && p < PPTR(y, x+1) &&
- p < PPTR(y+1, x-1) && p < PPTR(y+1, x) && p < PPTR(y+1, x+1) &&
- p < NPTR(y-1, x-1) && p < NPTR(y-1, x) && p < NPTR(y-1, x+1) &&
- p < NPTR(y, x-1) && p < NPTR(y , x) && p < NPTR(y, x+1) &&
- p < NPTR(y+1, x-1) && p < NPTR(y+1, x) && p < NPTR(y+1, x+1)))) {
-
- if (*counter < max_feat)
- {
- x_out[*counter] = (float)y;
- y_out[*counter] = (float)x;
- layer_out[*counter] = layer;
- (*counter)++;
- }
- }
- }
- }
-}
-
-// Interpolates a scale-space extremum's location and scale to subpixel
-// accuracy to form an image feature. Rejects features with low contrast.
-// Based on Section 4 of Lowe's paper.
-template<typename T>
-void interpolateExtrema(
- float* x_out,
- float* y_out,
- unsigned* layer_out,
- float* response_out,
- float* size_out,
- unsigned* counter,
- const float* x_in,
- const float* y_in,
- const unsigned* layer_in,
- const unsigned extrema_feat,
- std::vector< Array<T> >& dog_pyr,
- const unsigned max_feat,
- const unsigned octave,
- const unsigned n_layers,
- const float contrast_thr,
- const float edge_thr,
- const float sigma,
- const float img_scale)
-{
- for (int f = 0; f < extrema_feat; f++) {
- const float first_deriv_scale = img_scale*0.5f;
- const float second_deriv_scale = img_scale;
- const float cross_deriv_scale = img_scale*0.25f;
-
- float xl = 0, xy = 0, xx = 0, contr = 0;
- int i = 0;
-
- unsigned x = x_in[f];
- unsigned y = y_in[f];
- unsigned layer = layer_in[f];
-
- const T* prev_ptr = dog_pyr[octave*(n_layers+2) + layer-1].get();
- const T* center_ptr = dog_pyr[octave*(n_layers+2) + layer].get();
- const T* next_ptr = dog_pyr[octave*(n_layers+2) + layer+1].get();
-
- af::dim4 idims = dog_pyr[octave*(n_layers+2)].dims();
-
- bool converges = true;
-
- for (i = 0; i < MaxInterpSteps; i++) {
- float dD[3] = {(float)(CPTR(x+1, y) - CPTR(x-1, y)) * first_deriv_scale,
- (float)(CPTR(x, y+1) - CPTR(x, y-1)) * first_deriv_scale,
- (float)(NPTR(x, y) - PPTR(x, y)) * first_deriv_scale};
-
- float d2 = CPTR(x, y) * 2.f;
- float dxx = (CPTR(x+1, y) + CPTR(x-1, y) - d2) * second_deriv_scale;
- float dyy = (CPTR(x, y+1) + CPTR(x, y-1) - d2) * second_deriv_scale;
- float dss = (NPTR(x, y ) + PPTR(x, y ) - d2) * second_deriv_scale;
- float dxy = (CPTR(x+1, y+1) - CPTR(x-1, y+1) -
- CPTR(x+1, y-1) + CPTR(x-1, y-1)) * cross_deriv_scale;
- float dxs = (NPTR(x+1, y) - NPTR(x-1, y) -
- PPTR(x+1, y) + PPTR(x-1, y)) * cross_deriv_scale;
- float dys = (NPTR(x, y+1) - NPTR(x-1, y-1) -
- PPTR(x, y-1) + PPTR(x-1, y-1)) * cross_deriv_scale;
-
- float H[9] = {dxx, dxy, dxs,
- dxy, dyy, dys,
- dxs, dys, dss};
-
- float X[3];
- gaussianElimination<3>(H, dD, X);
-
- xl = -X[2];
- xy = -X[1];
- xx = -X[0];
-
- if (fabs(xl) < 0.5f && fabs(xy) < 0.5f && fabs(xx) < 0.5f)
- break;
-
- x += round(xx);
- y += round(xy);
- layer += round(xl);
-
- if (layer < 1 || layer > n_layers ||
- x < ImgBorder || x >= idims[1] - ImgBorder ||
- y < ImgBorder || y >= idims[0] - ImgBorder) {
- converges = false;
- break;
- }
- }
-
- // ensure convergence of interpolation
- if (i >= MaxInterpSteps || !converges)
- continue;
-
- float dD[3] = {(float)(CPTR(x+1, y) - CPTR(x-1, y)) * first_deriv_scale,
- (float)(CPTR(x, y+1) - CPTR(x, y-1)) * first_deriv_scale,
- (float)(NPTR(x, y) - PPTR(x, y)) * first_deriv_scale};
- float X[3] = {xx, xy, xl};
-
- float P = dD[0]*X[0] + dD[1]*X[1] + dD[2]*X[2];
-
- contr = center_ptr[x*idims[0]+y]*img_scale + P * 0.5f;
- if(abs(contr) < (contrast_thr / n_layers))
- continue;
-
- // principal curvatures are computed using the trace and det of Hessian
- float d2 = CPTR(x, y) * 2.f;
- float dxx = (CPTR(x+1, y) + CPTR(x-1, y) - d2) * second_deriv_scale;
- float dyy = (CPTR(x, y+1) + CPTR(x, y-1) - d2) * second_deriv_scale;
- float dxy = (CPTR(x+1, y+1) - CPTR(x-1, y+1) -
- CPTR(x+1, y-1) + CPTR(x-1, y-1)) * cross_deriv_scale;
-
- float tr = dxx + dyy;
- float det = dxx * dyy - dxy * dxy;
-
- // add FLT_EPSILON for double-precision compatibility
- if (det <= 0 || tr*tr*edge_thr >= (edge_thr + 1)*(edge_thr + 1)*det+FLT_EPSILON)
- continue;
-
- if (*counter < max_feat)
- {
- x_out[*counter] = (x + xx) * (1 << octave);
- y_out[*counter] = (y + xy) * (1 << octave);
- layer_out[*counter] = layer;
- response_out[*counter] = abs(contr);
- size_out[*counter] = sigma*pow(2.f, octave + (layer + xl) / n_layers);
- (*counter)++;
- }
- }
-}
-
-#undef CPTR
-#undef PPTR
-#undef NPTR
-
-// Remove duplicate keypoints
-void removeDuplicates(
- float* x_out,
- float* y_out,
- unsigned* layer_out,
- float* response_out,
- float* size_out,
- unsigned* counter,
- const std::vector<feat_t>& sorted_feat)
-{
- size_t nfeat = sorted_feat.size();
-
- for (size_t f = 0; f < nfeat; f++) {
- float prec_fctr = 1e4f;
-
- if (f < nfeat-1) {
- if (round(sorted_feat[f].f[0]*prec_fctr) == round(sorted_feat[f+1].f[0]*prec_fctr) &&
- round(sorted_feat[f].f[1]*prec_fctr) == round(sorted_feat[f+1].f[1]*prec_fctr) &&
- round(sorted_feat[f].f[2]*prec_fctr) == round(sorted_feat[f+1].f[2]*prec_fctr) &&
- round(sorted_feat[f].f[3]*prec_fctr) == round(sorted_feat[f+1].f[3]*prec_fctr) &&
- sorted_feat[f].l == sorted_feat[f+1].l)
- continue;
- }
-
- x_out[*counter] = sorted_feat[f].f[0];
- y_out[*counter] = sorted_feat[f].f[1];
- response_out[*counter] = sorted_feat[f].f[2];
- size_out[*counter] = sorted_feat[f].f[3];
- layer_out[*counter] = sorted_feat[f].l;
- (*counter)++;
- }
-}
-
-#define IPTR(Y, X) (img_ptr[(Y) * idims[0] + (X)])
-
-// Computes a canonical orientation for each image feature in an array. Based
-// on Section 5 of Lowe's paper. This function adds features to the array when
-// there is more than one dominant orientation at a given feature location.
-template<typename T>
-void calcOrientation(
- float* x_out,
- float* y_out,
- unsigned* layer_out,
- float* response_out,
- float* size_out,
- float* ori_out,
- unsigned* counter,
- const float* x_in,
- const float* y_in,
- const unsigned* layer_in,
- const float* response_in,
- const float* size_in,
- const unsigned total_feat,
- const std::vector< Array<T> >& gauss_pyr,
- const unsigned max_feat,
- const unsigned octave,
- const unsigned n_layers,
- const bool double_input)
-{
- const int n = OriHistBins;
-
- float hist[OriHistBins];
- float temphist[OriHistBins];
-
- for (unsigned f = 0; f < total_feat; f++) {
- // Load keypoint information
- const float real_x = x_in[f];
- const float real_y = y_in[f];
- const unsigned layer = layer_in[f];
- const float response = response_in[f];
- const float size = size_in[f];
-
- const int pt_x = (int)round(real_x / (1 << octave));
- const int pt_y = (int)round(real_y / (1 << octave));
-
- // Calculate auxiliary parameters
- const float scl_octv = size*0.5f / (1 << octave);
- const int radius = (int)round(OriRadius * scl_octv);
- const float sigma = OriSigFctr * scl_octv;
- const int len = (radius*2+1);
- const float exp_denom = 2.f * sigma * sigma;
-
- // Points img to correct Gaussian pyramid layer
- const Array<T> img = gauss_pyr[octave*(n_layers+3) + layer];
- const T* img_ptr = img.get();
-
- for (int i = 0; i < OriHistBins; i++)
- hist[i] = 0.f;
-
- af::dim4 idims = img.dims();
-
- // Calculate orientation histogram
- for (int l = 0; l < len*len; l++) {
- int i = l / len - radius;
- int j = l % len - radius;
-
- int y = pt_y + i;
- int x = pt_x + j;
- if (y < 1 || y >= idims[0] - 1 ||
- x < 1 || x >= idims[1] - 1)
- continue;
-
- float dx = (float)(IPTR(x+1, y) - IPTR(x-1, y));
- float dy = (float)(IPTR(x, y-1) - IPTR(x, y+1));
-
- float mag = sqrt(dx*dx+dy*dy);
- float ori = atan2(dy,dx);
- float w = exp(-(i*i + j*j)/exp_denom);
-
- int bin = round(n*(ori+PI_VAL)/(2.f*PI_VAL));
- bin = bin < n ? bin : 0;
-
- hist[bin] += w*mag;
- }
-
- for (int i = 0; i < SmoothOriPasses; i++) {
- for (int j = 0; j < n; j++) {
- temphist[j] = hist[j];
- }
- for (int j = 0; j < n; j++) {
- float prev = (j == 0) ? temphist[n-1] : temphist[j-1];
- float next = (j+1 == n) ? temphist[0] : temphist[j+1];
- hist[j] = 0.25f * prev + 0.5f * temphist[j] + 0.25f * next;
- }
- }
-
- float omax = hist[0];
- for (int i = 1; i < n; i++)
- omax = max(omax, hist[i]);
-
- float mag_thr = (float)(omax * OriPeakRatio);
- int l, r;
- for (int j = 0; j < n; j++) {
- l = (j == 0) ? n - 1 : j - 1;
- r = (j + 1) % n;
- if (hist[j] > hist[l] &&
- hist[j] > hist[r] &&
- hist[j] >= mag_thr) {
- if (*counter < max_feat) {
- float bin = j + 0.5f * (hist[l] - hist[r]) /
- (hist[l] - 2.0f*hist[j] + hist[r]);
- bin = (bin < 0.0f) ? bin + n : (bin >= n) ? bin - n : bin;
- float ori = 360.f - ((360.f/n) * bin);
-
- float new_real_x = real_x;
- float new_real_y = real_y;
- float new_size = size;
-
- if (double_input) {
- float scale = 0.5f;
- new_real_x *= scale;
- new_real_y *= scale;
- new_size *= scale;
- }
-
- x_out[*counter] = new_real_x;
- y_out[*counter] = new_real_y;
- layer_out[*counter] = layer;
- response_out[*counter] = response;
- size_out[*counter] = new_size;
- ori_out[*counter] = ori;
- (*counter)++;
- }
- }
- }
- }
-}
-
-void normalizeDesc(
- float* desc,
- const int histlen)
-{
- float len_sq = 0.0f;
-
- for (int i = 0; i < histlen; i++)
- len_sq += desc[i] * desc[i];
-
- float len_inv = 1.0f / sqrt(len_sq);
-
- for (int i = 0; i < histlen; i++) {
- desc[i] *= len_inv;
- }
-}
-
-// Computes feature descriptors for features in an array. Based on Section 6
-// of Lowe's paper.
-template<typename T>
-void computeDescriptor(
- float* desc_out,
- const unsigned desc_len,
- const float* x_in,
- const float* y_in,
- const unsigned* layer_in,
- const float* response_in,
- const float* size_in,
- const float* ori_in,
- const unsigned total_feat,
- const std::vector< Array<T> >& gauss_pyr,
- const int d,
- const int n,
- const float scale,
- const float sigma,
- const unsigned octave,
- const unsigned n_layers)
-{
- float desc[128];
-
- for (unsigned f = 0; f < total_feat; f++) {
- const unsigned layer = layer_in[f];
- float ori = (360.f - ori_in[f]) * PI_VAL / 180.f;
- ori = (ori > PI_VAL) ? ori - PI_VAL*2 : ori;
- //const float size = size_in[f];
- const int fx = round(x_in[f] * scale);
- const int fy = round(y_in[f] * scale);
-
- // Points img to correct Gaussian pyramid layer
- Array<T> img = gauss_pyr[octave*(n_layers+3) + layer];
- const T* img_ptr = img.get();
- af::dim4 idims = img.dims();
-
- float cos_t = cos(ori);
- float sin_t = sin(ori);
- float bins_per_rad = n / (PI_VAL * 2.f);
- float exp_denom = d * d * 0.5f;
- float hist_width = DescrSclFctr * sigma * powf(2.f, layer/n_layers);
- int radius = hist_width * sqrt(2.f) * (d + 1.f) * 0.5f + 0.5f;
-
- int len = radius*2+1;
- const int histlen = d*d*n;
-
- for (int i = 0; i < histlen; i++)
- desc[i] = 0.f;
-
- // Calculate orientation histogram
- for (int l = 0; l < len*len; l++) {
- int i = l / len - radius;
- int j = l % len - radius;
-
- int y = fy + i;
- int x = fx + j;
-
- float x_rot = (j * cos_t - i * sin_t) / hist_width;
- float y_rot = (j * sin_t + i * cos_t) / hist_width;
- float xbin = x_rot + d/2 - 0.5f;
- float ybin = y_rot + d/2 - 0.5f;
-
- if (ybin > -1.0f && ybin < d && xbin > -1.0f && xbin < d &&
- y > 0 && y < idims[0] - 1 && x > 0 && x < idims[1] - 1) {
- float dx = (float)(IPTR(x+1, y) - IPTR(x-1, y));
- float dy = (float)(IPTR(x, y-1) - IPTR(x, y+1));
-
- float grad_mag = sqrt(dx*dx + dy*dy);
- float grad_ori = atan2(dy, dx) - ori;
- while (grad_ori < 0.0f)
- grad_ori += PI_VAL*2;
- while (grad_ori >= PI_VAL*2)
- grad_ori -= PI_VAL*2;
-
- float w = exp(-(x_rot*x_rot + y_rot*y_rot) / exp_denom);
- float obin = grad_ori * bins_per_rad;
- float mag = grad_mag*w;
-
- int x0 = floor(xbin);
- int y0 = floor(ybin);
- int o0 = floor(obin);
- xbin -= x0;
- ybin -= y0;
- obin -= o0;
-
- for (int yl = 0; yl <= 1; yl++) {
- int yb = y0 + yl;
- if (yb >= 0 && yb < d) {
- float v_y = mag * ((yl == 0) ? 1.0f - ybin : ybin);
- for (int xl = 0; xl <= 1; xl++) {
- int xb = x0 + xl;
- if (xb >= 0 && xb < d) {
- float v_x = v_y * ((xl == 0) ? 1.0f - xbin : xbin);
- for (int ol = 0; ol <= 1; ol++) {
- int ob = (o0 + ol) % n;
- float v_o = v_x * ((ol == 0) ? 1.0f - obin : obin);
- desc[(yb*d + xb)*n + ob] += v_o;
- }
- }
- }
- }
- }
- }
- }
-
- normalizeDesc(desc, histlen);
-
- for (int i = 0; i < d*d*n; i++)
- desc[i] = min(desc[i], DescrMagThr);
-
- normalizeDesc(desc, histlen);
-
- // Calculate final descriptor values
- for (int k = 0; k < d*d*n; k++) {
- desc_out[f*desc_len+k] = round(min(255.f, desc[k] * IntDescrFctr));
- }
- }
-}
-
-#undef IPTR
-
-template<typename T, typename convAccT>
-Array<T> createInitialImage(
- const Array<T>& img,
- const float init_sigma,
- const bool double_input)
-{
- af::dim4 idims = img.dims();
-
- Array<T> init_img = createEmptyArray<T>(af::dim4());
-
- float s = (double_input) ? sqrt(init_sigma * init_sigma - InitSigma * InitSigma * 4)
- : sqrt(init_sigma * init_sigma - InitSigma * InitSigma);
-
- Array<T> filter = gauss_filter<T>(s);
-
- if (double_input) {
- Array<T> double_img = resize<T>(img, idims[0] * 2, idims[1] * 2, AF_INTERP_BILINEAR);
- init_img = convolve2<T, convAccT, false>(double_img, filter, filter);
- }
- else {
- init_img = convolve2<T, convAccT, false>(init_img, filter, filter);
- }
-
- return init_img;
-}
-
-template<typename T, typename convAccT>
-std::vector< Array<T> > buildGaussPyr(
- const Array<T>& init_img,
- const unsigned n_octaves,
- const unsigned n_layers,
- const float init_sigma)
-{
- // Precompute Gaussian sigmas using the following formula:
- // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
- std::vector<float> sig_layers(n_layers + 3);
- sig_layers[0] = init_sigma;
- float k = std::pow(2.0f, 1.0f / n_layers);
- for (unsigned i = 1; i < n_layers + 3; i++) {
- float sig_prev = std::pow(k, i-1) * init_sigma;
- float sig_total = sig_prev * k;
- sig_layers[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev);
- }
-
- // Gaussian Pyramid
- std::vector< Array<T> > gauss_pyr(n_octaves * (n_layers+3), createEmptyArray<T>(af::dim4()));
- for (unsigned o = 0; o < n_octaves; o++) {
- for (unsigned l = 0; l < n_layers+3; l++) {
- unsigned src_idx = (l == 0) ? (o-1)*(n_layers+3) + n_layers : o*(n_layers+3) + l-1;
- unsigned idx = o*(n_layers+3) + l;
-
- if (o == 0 && l == 0) {
- gauss_pyr[idx] = init_img;
- }
- else if (l == 0) {
- af::dim4 sdims = gauss_pyr[src_idx].dims();
- gauss_pyr[idx] = resize<T>(gauss_pyr[src_idx], sdims[0] / 2, sdims[1] / 2, AF_INTERP_BILINEAR);
- }
- else {
- Array<T> filter = gauss_filter<T>(sig_layers[l]);
-
- gauss_pyr[idx] = convolve2<T, convAccT, false>(gauss_pyr[src_idx], filter, filter);
- }
- }
- }
-
- return gauss_pyr;
-}
-
-template<typename T>
-std::vector< Array<T> > buildDoGPyr(
- std::vector< Array<T> >& gauss_pyr,
- const unsigned n_octaves,
- const unsigned n_layers)
-{
- // DoG Pyramid
- std::vector< Array<T> > dog_pyr(n_octaves * (n_layers+2), createEmptyArray<T>(af::dim4()));
- for (unsigned o = 0; o < n_octaves; o++) {
- for (unsigned l = 0; l < n_layers+2; l++) {
- unsigned idx = o*(n_layers+2) + l;
- unsigned bottom = o*(n_layers+3) + l;
- unsigned top = o*(n_layers+3) + l+1;
-
- dog_pyr[idx] = createEmptyArray<T>(gauss_pyr[bottom].dims());
-
- sub<T>(dog_pyr[idx], gauss_pyr[top], gauss_pyr[bottom]);
- }
- }
-
- return dog_pyr;
-}
-
-
template<typename T, typename convAccT>
unsigned sift(Array<float>& x, Array<float>& y, Array<float>& score,
Array<float>& ori, Array<float>& size, Array<float>& desc,
@@ -831,217 +38,13 @@ unsigned sift(Array<float>& x, Array<float>& y, Array<float>& score,
const float init_sigma, const bool double_input,
const float img_scale, const float feature_ratio)
{
- af::dim4 idims = in.dims();
-
- const unsigned min_dim = (double_input) ? min(idims[0]*2, idims[1]*2)
- : min(idims[0], idims[1]);
- const unsigned n_octaves = floor(log(min_dim) / log(2)) - 2;
-
- Array<T> init_img = createInitialImage<T, convAccT>(in, init_sigma, double_input);
-
- std::vector< Array<T> > gauss_pyr = buildGaussPyr<T, convAccT>(init_img, n_octaves, n_layers, init_sigma);
-
- std::vector< Array<T> > dog_pyr = buildDoGPyr<T>(gauss_pyr, n_octaves, n_layers);
-
- std::vector<float*> x_pyr(n_octaves, NULL);
- std::vector<float*> y_pyr(n_octaves, NULL);
- std::vector<float*> response_pyr(n_octaves, NULL);
- std::vector<float*> size_pyr(n_octaves, NULL);
- std::vector<float*> ori_pyr(n_octaves, NULL);
- std::vector<float*> desc_pyr(n_octaves, NULL);
- std::vector<unsigned> feat_pyr(n_octaves, 0);
- unsigned total_feat = 0;
-
- const unsigned d = DescrWidth;
- const unsigned n = DescrHistBins;
- const unsigned desc_len = d*d*n;
-
- for (unsigned i = 0; i < n_octaves; i++) {
- af::dim4 ddims = dog_pyr[i*(n_layers+2)].dims();
- if (ddims[0]-2*ImgBorder < 1 ||
- ddims[1]-2*ImgBorder < 1)
- continue;
-
- const unsigned imel = ddims[0] * ddims[1];
- const unsigned max_feat = ceil(imel * feature_ratio);
-
- float* extrema_x = memAlloc<float>(max_feat);
- float* extrema_y = memAlloc<float>(max_feat);
- unsigned* extrema_layer = memAlloc<unsigned>(max_feat);
- unsigned extrema_feat = 0;
-
- for (unsigned j = 1; j <= n_layers; j++) {
- unsigned prev = i*(n_layers+2) + j-1;
- unsigned center = i*(n_layers+2) + j;
- unsigned next = i*(n_layers+2) + j+1;
-
- unsigned layer = j;
-
- float extrema_thr = 0.5f * contrast_thr / n_layers;
- detectExtrema<T>(extrema_x, extrema_y, extrema_layer, &extrema_feat,
- dog_pyr[prev], dog_pyr[center], dog_pyr[next],
- layer, max_feat, extrema_thr);
- }
-
- extrema_feat = min(extrema_feat, max_feat);
-
- if (extrema_feat == 0) {
- memFree(extrema_x);
- memFree(extrema_y);
- memFree(extrema_layer);
-
- continue;
- }
-
- unsigned interp_feat = 0;
-
- float* interp_x = memAlloc<float>(extrema_feat);
- float* interp_y = memAlloc<float>(extrema_feat);
- unsigned* interp_layer = memAlloc<unsigned>(extrema_feat);
- float* interp_response = memAlloc<float>(extrema_feat);
- float* interp_size = memAlloc<float>(extrema_feat);
-
- interpolateExtrema<T>(interp_x, interp_y, interp_layer,
- interp_response, interp_size, &interp_feat,
- extrema_x, extrema_y, extrema_layer, extrema_feat,
- dog_pyr, max_feat, i, n_layers,
- contrast_thr, edge_thr, init_sigma, img_scale);
-
- interp_feat = min(interp_feat, max_feat);
-
- if (interp_feat == 0) {
- memFree(interp_x);
- memFree(interp_y);
- memFree(interp_layer);
- memFree(interp_response);
- memFree(interp_size);
-
- continue;
- }
-
- std::vector<feat_t> sorted_feat;
- array_to_feat(sorted_feat, interp_x, interp_y, interp_layer, interp_response, interp_size, interp_feat);
- std::sort(sorted_feat.begin(), sorted_feat.end(), feat_cmp);
-
- memFree(interp_x);
- memFree(interp_y);
- memFree(interp_layer);
- memFree(interp_response);
- memFree(interp_size);
-
- unsigned nodup_feat = 0;
-
- float* nodup_x = memAlloc<float>(interp_feat);
- float* nodup_y = memAlloc<float>(interp_feat);
- unsigned* nodup_layer = memAlloc<unsigned>(interp_feat);
- float* nodup_response = memAlloc<float>(interp_feat);
- float* nodup_size = memAlloc<float>(interp_feat);
-
- removeDuplicates(nodup_x, nodup_y, nodup_layer,
- nodup_response, nodup_size, &nodup_feat,
- sorted_feat);
-
- const unsigned max_oriented_feat = nodup_feat * 3;
-
- float* oriented_x = memAlloc<float>(max_oriented_feat);
- float* oriented_y = memAlloc<float>(max_oriented_feat);
- unsigned* oriented_layer = memAlloc<unsigned>(max_oriented_feat);
- float* oriented_response = memAlloc<float>(max_oriented_feat);
- float* oriented_size = memAlloc<float>(max_oriented_feat);
- float* oriented_ori = memAlloc<float>(max_oriented_feat);
-
- unsigned oriented_feat = 0;
-
- calcOrientation<T>(oriented_x, oriented_y, oriented_layer,
- oriented_response, oriented_size, oriented_ori, &oriented_feat,
- nodup_x, nodup_y, nodup_layer,
- nodup_response, nodup_size, nodup_feat,
- gauss_pyr, max_oriented_feat, i, n_layers, double_input);
-
- memFree(nodup_x);
- memFree(nodup_y);
- memFree(nodup_layer);
- memFree(nodup_response);
- memFree(nodup_size);
-
- if (oriented_feat == 0) {
- memFree(oriented_x);
- memFree(oriented_y);
- memFree(oriented_layer);
- memFree(oriented_response);
- memFree(oriented_size);
- memFree(oriented_ori);
-
- continue;
- }
-
- float* desc = memAlloc<float>(oriented_feat * desc_len);
-
- float scale = 1.f/(1 << i);
- if (double_input) scale *= 2.f;
-
- computeDescriptor<T>(desc, desc_len,
- oriented_x, oriented_y, oriented_layer,
- oriented_response, oriented_size, oriented_ori,
- oriented_feat, gauss_pyr, d, n, scale, init_sigma, i, n_layers);
-
- total_feat += oriented_feat;
- feat_pyr[i] = oriented_feat;
-
- if (oriented_feat > 0) {
- x_pyr[i] = oriented_x;
- y_pyr[i] = oriented_y;
- response_pyr[i] = oriented_response;
- ori_pyr[i] = oriented_ori;
- size_pyr[i] = oriented_size;
- desc_pyr[i] = desc;
- }
- }
-
- if (total_feat > 0) {
- const af::dim4 total_feat_dims(total_feat);
- const af::dim4 desc_dims(desc_len, total_feat);
-
- // Allocate output memory
- x = createEmptyArray<float>(total_feat_dims);
- y = createEmptyArray<float>(total_feat_dims);
- score = createEmptyArray<float>(total_feat_dims);
- ori = createEmptyArray<float>(total_feat_dims);
- size = createEmptyArray<float>(total_feat_dims);
- desc = createEmptyArray<float>(desc_dims);
-
- float* x_ptr = x.get();
- float* y_ptr = y.get();
- float* score_ptr = score.get();
- float* ori_ptr = ori.get();
- float* size_ptr = size.get();
- float* desc_ptr = desc.get();
-
- unsigned offset = 0;
- for (unsigned i = 0; i < n_octaves; i++) {
- if (feat_pyr[i] == 0)
- continue;
-
- memcpy(x_ptr+offset, x_pyr[i], feat_pyr[i] * sizeof(float));
- memcpy(y_ptr+offset, y_pyr[i], feat_pyr[i] * sizeof(float));
- memcpy(score_ptr+offset, response_pyr[i], feat_pyr[i] * sizeof(float));
- memcpy(ori_ptr+offset, ori_pyr[i], feat_pyr[i] * sizeof(float));
- memcpy(size_ptr+offset, size_pyr[i], feat_pyr[i] * sizeof(float));
-
- memcpy(desc_ptr+(offset*desc_len), desc_pyr[i], feat_pyr[i] * desc_len * sizeof(float));
-
- memFree(x_pyr[i]);
- memFree(y_pyr[i]);
- memFree(response_pyr[i]);
- memFree(ori_pyr[i]);
- memFree(size_pyr[i]);
- memFree(desc_pyr[i]);
-
- offset += feat_pyr[i];
- }
- }
-
- return total_feat;
+#ifdef AF_NONFREE
+ return sift_impl<T, convAcct>(x, y, score, ori, size, desc, in, n_layers,
+ contrast_thr, edge_thr, init_sigma, double_input,
+ img_scale, feature_ratio);
+#else
+ AF_ERROR("ArrayFire was not built with nonfree support, SIFT disabled\n", AFF_ERR_NONFREE);
+#endif
}
#define INSTANTIATE(T, convAccT)\
diff --git a/src/backend/cpu/sift_nonfree.hpp b/src/backend/cpu/sift_nonfree.hpp
new file mode 100644
index 0000000..9652f0a
--- /dev/null
+++ b/src/backend/cpu/sift_nonfree.hpp
@@ -0,0 +1,1033 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+// The source code contained in this file is based on the original code by
+// Rob Hess. Please note that SIFT is an algorithm patented and protected
+// by US law, before using this code or any binary forms generated from it,
+// verify that you have permission to do so. The original license by Rob Hess
+// can be read below:
+//
+// Copyright (c) 2006-2012, Rob Hess <rob at iqengines.com>
+// All rights reserved.
+//
+// The following patent has been issued for methods embodied in this
+// software: "Method and apparatus for identifying scale invariant features
+// in an image and use of same for locating an object in an image," David
+// G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application
+// filed March 8, 1999. Asignee: The University of British Columbia. For
+// further details, contact David Lowe (lowe at cs.ubc.ca) or the
+// University-Industry Liaison Office of the University of British
+// Columbia.
+//
+// Note that restrictions imposed by this patent (and possibly others)
+// exist independently of and may be in conflict with the freedoms granted
+// in this license, which refers to copyright of the program, not patents
+// for any methods that it implements. Both copyright and patent law must
+// be obeyed to legally use and redistribute this program and it is not the
+// purpose of this license to induce you to infringe any patents or other
+// property right claims or to contest validity of any such claims. If you
+// redistribute or use the program, then this license merely protects you
+// from committing copyright infringement. It does not protect you from
+// committing patent infringement. So, before you do anything with this
+// program, make sure that you have permission to do so not merely in terms
+// of copyright, but also in terms of patent law.
+//
+// Please note that this license is not to be understood as a guarantee
+// either. If you use the program according to this license, but in
+// conflict with patent law, it does not mean that the licensor will refund
+// you for any losses that you incur if you are sued for your patent
+// infringement.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// * Redistributions of source code must retain the above copyright and
+// patent notices, this list of conditions and the following
+// disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in
+// the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Oregon State University nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+using af::dim4;
+
+namespace cpu
+{
+
+ static const float PI_VAL = 3.14159265358979323846f;
+
+// default width of descriptor histogram array
+ static const int DescrWidth = 4;
+
+// default number of bins per histogram in descriptor array
+ static const int DescrHistBins = 8;
+
+// assumed gaussian blur for input image
+ static const float InitSigma = 0.5f;
+
+// width of border in which to ignore keypoints
+ static const int ImgBorder = 5;
+
+// maximum steps of keypoint interpolation before failure
+ static const int MaxInterpSteps = 5;
+
+// default number of bins in histogram for orientation assignment
+ static const int OriHistBins = 36;
+
+// determines gaussian sigma for orientation assignment
+ static const float OriSigFctr = 1.5f;
+
+// determines the radius of the region used in orientation assignment */
+ static const float OriRadius = 3.0f * OriSigFctr;
+
+// number of passes of orientation histogram smoothing
+ static const int SmoothOriPasses = 2;
+
+// orientation magnitude relative to max that results in new feature
+ static const float OriPeakRatio = 0.8f;
+
+// determines the size of a single descriptor orientation histogram
+ static const float DescrSclFctr = 3.f;
+
+// threshold on magnitude of elements of descriptor vector
+ static const float DescrMagThr = 0.2f;
+
+// factor used to convert floating-point descriptor to unsigned char
+ static const float IntDescrFctr = 512.f;
+
+ typedef struct
+ {
+ float f[4];
+ unsigned l;
+ } feat_t;
+
+ bool feat_cmp(feat_t i, feat_t j)
+ {
+ for (int k = 0; k < 4; k++)
+ if (i.f[k] != j.f[k])
+ return (i.f[k] < j.f[k]);
+ if (i.l != j.l)
+ return (i.l < j.l);
+
+ return true;
+ }
+
+ void array_to_feat(std::vector<feat_t>& feat, float *x, float *y, unsigned *layer, float *resp, float *size, unsigned nfeat)
+ {
+ feat.resize(nfeat);
+ for (unsigned i = 0; i < feat.size(); i++) {
+ feat[i].f[0] = x[i];
+ feat[i].f[1] = y[i];
+ feat[i].f[2] = resp[i];
+ feat[i].f[3] = size[i];
+ feat[i].l = layer[i];
+ }
+ }
+
+ template<typename T>
+ void gaussian1D(T* out, const int dim, double sigma=0.0)
+ {
+ if(!(sigma>0)) sigma = 0.25*dim;
+
+ T sum = (T)0;
+ for(int i=0;i<dim;i++)
+ {
+ int x = i-(dim-1)/2;
+ T el = 1. / sqrt(2 * PI_VAL * sigma*sigma) * exp(-((x*x)/(2*(sigma*sigma))));
+ out[i] = el;
+ sum += el;
+ }
+
+ for(int k=0;k<dim;k++)
+ out[k] /= sum;
+ }
+
+ template<typename T>
+ Array<T> gauss_filter(float sigma)
+ {
+ // Using 6-sigma rule
+ unsigned gauss_len = (unsigned)round(sigma * 6 + 1) | 1;
+
+ Array<T> filter = createEmptyArray<T>(gauss_len);
+ gaussian1D((T*)getDevicePtr(filter), gauss_len, sigma);
+
+ return filter;
+ }
+
+ template<int N>
+ void gaussianElimination(float* A, float* b, float* x)
+ {
+ // forward elimination
+ for (int i = 0; i < N-1; i++) {
+ for (int j = i+1; j < N; j++) {
+ float s = A[j*N+i] / A[i*N+i];
+
+ for (int k = i; k < N; k++)
+ A[j*N+k] -= s * A[i*N+k];
+
+ b[j] -= s * b[i];
+ }
+ }
+
+ for (int i = 0; i < N; i++)
+ x[i] = 0;
+
+ // backward substitution
+ float sum = 0;
+ for (int i = 0; i <= N-2; i++) {
+ sum = b[i];
+ for (int j = i+1; j < N; j++)
+ sum -= A[i*N+j] * x[j];
+ x[i] = sum / A[i*N+i];
+ }
+ }
+
+ template<typename T>
+ void sub(
+ Array<T>& out,
+ const Array<T>& in1,
+ const Array<T>& in2)
+ {
+ size_t nel = in1.elements();
+ T* out_ptr = out.get();
+ const T* in1_ptr = in1.get();
+ const T* in2_ptr = in2.get();
+
+ for (size_t i = 0; i < nel; i++) {
+ out_ptr[i] = in1_ptr[i] - in2_ptr[i];
+ }
+ }
+
+#define CPTR(Y, X) (center_ptr[(Y) * idims[0] + (X)])
+#define PPTR(Y, X) (prev_ptr[(Y) * idims[0] + (X)])
+#define NPTR(Y, X) (next_ptr[(Y) * idims[0] + (X)])
+
+// Determines whether a pixel is a scale-space extremum by comparing it to its
+// 3x3x3 pixel neighborhood.
+ template<typename T>
+ void detectExtrema(
+ float* x_out,
+ float* y_out,
+ unsigned* layer_out,
+ unsigned* counter,
+ const Array<T>& prev,
+ const Array<T>& center,
+ const Array<T>& next,
+ const unsigned layer,
+ const unsigned max_feat,
+ const float threshold)
+ {
+ const af::dim4 idims = center.dims();
+ const T* prev_ptr = prev.get();
+ const T* center_ptr = center.get();
+ const T* next_ptr = next.get();
+
+ for (int y = ImgBorder; y < idims[1]-ImgBorder; y++) {
+ for (int x = ImgBorder; x < idims[0]-ImgBorder; x++) {
+ float p = center_ptr[y*idims[0] + x];
+
+ // Find extrema
+ if (abs((float)p) > threshold &&
+ ((p > 0 && p > CPTR(y-1, x-1) && p > CPTR(y-1, x) &&
+ p > CPTR(y-1, x+1) && p > CPTR(y, x-1) && p > CPTR(y, x+1) &&
+ p > CPTR(y+1, x-1) && p > CPTR(y+1, x) && p > CPTR(y+1, x+1) &&
+ p > PPTR(y-1, x-1) && p > PPTR(y-1, x) && p > PPTR(y-1, x+1) &&
+ p > PPTR(y, x-1) && p > PPTR(y , x) && p > PPTR(y, x+1) &&
+ p > PPTR(y+1, x-1) && p > PPTR(y+1, x) && p > PPTR(y+1, x+1) &&
+ p > NPTR(y-1, x-1) && p > NPTR(y-1, x) && p > NPTR(y-1, x+1) &&
+ p > NPTR(y, x-1) && p > NPTR(y , x) && p > NPTR(y, x+1) &&
+ p > NPTR(y+1, x-1) && p > NPTR(y+1, x) && p > NPTR(y+1, x+1)) ||
+ (p < 0 && p < CPTR(y-1, x-1) && p < CPTR(y-1, x) &&
+ p < CPTR(y-1, x+1) && p < CPTR(y, x-1) && p < CPTR(y, x+1) &&
+ p < CPTR(y+1, x-1) && p < CPTR(y+1, x) && p < CPTR(y+1, x+1) &&
+ p < PPTR(y-1, x-1) && p < PPTR(y-1, x) && p < PPTR(y-1, x+1) &&
+ p < PPTR(y, x-1) && p < PPTR(y , x) && p < PPTR(y, x+1) &&
+ p < PPTR(y+1, x-1) && p < PPTR(y+1, x) && p < PPTR(y+1, x+1) &&
+ p < NPTR(y-1, x-1) && p < NPTR(y-1, x) && p < NPTR(y-1, x+1) &&
+ p < NPTR(y, x-1) && p < NPTR(y , x) && p < NPTR(y, x+1) &&
+ p < NPTR(y+1, x-1) && p < NPTR(y+1, x) && p < NPTR(y+1, x+1)))) {
+
+ if (*counter < max_feat)
+ {
+ x_out[*counter] = (float)y;
+ y_out[*counter] = (float)x;
+ layer_out[*counter] = layer;
+ (*counter)++;
+ }
+ }
+ }
+ }
+ }
+
+// Interpolates a scale-space extremum's location and scale to subpixel
+// accuracy to form an image feature. Rejects features with low contrast.
+// Based on Section 4 of Lowe's paper.
+ template<typename T>
+ void interpolateExtrema(
+ float* x_out,
+ float* y_out,
+ unsigned* layer_out,
+ float* response_out,
+ float* size_out,
+ unsigned* counter,
+ const float* x_in,
+ const float* y_in,
+ const unsigned* layer_in,
+ const unsigned extrema_feat,
+ std::vector< Array<T> >& dog_pyr,
+ const unsigned max_feat,
+ const unsigned octave,
+ const unsigned n_layers,
+ const float contrast_thr,
+ const float edge_thr,
+ const float sigma,
+ const float img_scale)
+ {
+ for (int f = 0; f < extrema_feat; f++) {
+ const float first_deriv_scale = img_scale*0.5f;
+ const float second_deriv_scale = img_scale;
+ const float cross_deriv_scale = img_scale*0.25f;
+
+ float xl = 0, xy = 0, xx = 0, contr = 0;
+ int i = 0;
+
+ unsigned x = x_in[f];
+ unsigned y = y_in[f];
+ unsigned layer = layer_in[f];
+
+ const T* prev_ptr = dog_pyr[octave*(n_layers+2) + layer-1].get();
+ const T* center_ptr = dog_pyr[octave*(n_layers+2) + layer].get();
+ const T* next_ptr = dog_pyr[octave*(n_layers+2) + layer+1].get();
+
+ af::dim4 idims = dog_pyr[octave*(n_layers+2)].dims();
+
+ bool converges = true;
+
+ for (i = 0; i < MaxInterpSteps; i++) {
+ float dD[3] = {(float)(CPTR(x+1, y) - CPTR(x-1, y)) * first_deriv_scale,
+ (float)(CPTR(x, y+1) - CPTR(x, y-1)) * first_deriv_scale,
+ (float)(NPTR(x, y) - PPTR(x, y)) * first_deriv_scale};
+
+ float d2 = CPTR(x, y) * 2.f;
+ float dxx = (CPTR(x+1, y) + CPTR(x-1, y) - d2) * second_deriv_scale;
+ float dyy = (CPTR(x, y+1) + CPTR(x, y-1) - d2) * second_deriv_scale;
+ float dss = (NPTR(x, y ) + PPTR(x, y ) - d2) * second_deriv_scale;
+ float dxy = (CPTR(x+1, y+1) - CPTR(x-1, y+1) -
+ CPTR(x+1, y-1) + CPTR(x-1, y-1)) * cross_deriv_scale;
+ float dxs = (NPTR(x+1, y) - NPTR(x-1, y) -
+ PPTR(x+1, y) + PPTR(x-1, y)) * cross_deriv_scale;
+ float dys = (NPTR(x, y+1) - NPTR(x-1, y-1) -
+ PPTR(x, y-1) + PPTR(x-1, y-1)) * cross_deriv_scale;
+
+ float H[9] = {dxx, dxy, dxs,
+ dxy, dyy, dys,
+ dxs, dys, dss};
+
+ float X[3];
+ gaussianElimination<3>(H, dD, X);
+
+ xl = -X[2];
+ xy = -X[1];
+ xx = -X[0];
+
+ if (fabs(xl) < 0.5f && fabs(xy) < 0.5f && fabs(xx) < 0.5f)
+ break;
+
+ x += round(xx);
+ y += round(xy);
+ layer += round(xl);
+
+ if (layer < 1 || layer > n_layers ||
+ x < ImgBorder || x >= idims[1] - ImgBorder ||
+ y < ImgBorder || y >= idims[0] - ImgBorder) {
+ converges = false;
+ break;
+ }
+ }
+
+ // ensure convergence of interpolation
+ if (i >= MaxInterpSteps || !converges)
+ continue;
+
+ float dD[3] = {(float)(CPTR(x+1, y) - CPTR(x-1, y)) * first_deriv_scale,
+ (float)(CPTR(x, y+1) - CPTR(x, y-1)) * first_deriv_scale,
+ (float)(NPTR(x, y) - PPTR(x, y)) * first_deriv_scale};
+ float X[3] = {xx, xy, xl};
+
+ float P = dD[0]*X[0] + dD[1]*X[1] + dD[2]*X[2];
+
+ contr = center_ptr[x*idims[0]+y]*img_scale + P * 0.5f;
+ if(abs(contr) < (contrast_thr / n_layers))
+ continue;
+
+ // principal curvatures are computed using the trace and det of Hessian
+ float d2 = CPTR(x, y) * 2.f;
+ float dxx = (CPTR(x+1, y) + CPTR(x-1, y) - d2) * second_deriv_scale;
+ float dyy = (CPTR(x, y+1) + CPTR(x, y-1) - d2) * second_deriv_scale;
+ float dxy = (CPTR(x+1, y+1) - CPTR(x-1, y+1) -
+ CPTR(x+1, y-1) + CPTR(x-1, y-1)) * cross_deriv_scale;
+
+ float tr = dxx + dyy;
+ float det = dxx * dyy - dxy * dxy;
+
+ // add FLT_EPSILON for double-precision compatibility
+ if (det <= 0 || tr*tr*edge_thr >= (edge_thr + 1)*(edge_thr + 1)*det+FLT_EPSILON)
+ continue;
+
+ if (*counter < max_feat)
+ {
+ x_out[*counter] = (x + xx) * (1 << octave);
+ y_out[*counter] = (y + xy) * (1 << octave);
+ layer_out[*counter] = layer;
+ response_out[*counter] = abs(contr);
+ size_out[*counter] = sigma*pow(2.f, octave + (layer + xl) / n_layers);
+ (*counter)++;
+ }
+ }
+ }
+
+#undef CPTR
+#undef PPTR
+#undef NPTR
+
+// Remove duplicate keypoints
+ void removeDuplicates(
+ float* x_out,
+ float* y_out,
+ unsigned* layer_out,
+ float* response_out,
+ float* size_out,
+ unsigned* counter,
+ const std::vector<feat_t>& sorted_feat)
+ {
+ size_t nfeat = sorted_feat.size();
+
+ for (size_t f = 0; f < nfeat; f++) {
+ float prec_fctr = 1e4f;
+
+ if (f < nfeat-1) {
+ if (round(sorted_feat[f].f[0]*prec_fctr) == round(sorted_feat[f+1].f[0]*prec_fctr) &&
+ round(sorted_feat[f].f[1]*prec_fctr) == round(sorted_feat[f+1].f[1]*prec_fctr) &&
+ round(sorted_feat[f].f[2]*prec_fctr) == round(sorted_feat[f+1].f[2]*prec_fctr) &&
+ round(sorted_feat[f].f[3]*prec_fctr) == round(sorted_feat[f+1].f[3]*prec_fctr) &&
+ sorted_feat[f].l == sorted_feat[f+1].l)
+ continue;
+ }
+
+ x_out[*counter] = sorted_feat[f].f[0];
+ y_out[*counter] = sorted_feat[f].f[1];
+ response_out[*counter] = sorted_feat[f].f[2];
+ size_out[*counter] = sorted_feat[f].f[3];
+ layer_out[*counter] = sorted_feat[f].l;
+ (*counter)++;
+ }
+ }
+
+#define IPTR(Y, X) (img_ptr[(Y) * idims[0] + (X)])
+
+// Computes a canonical orientation for each image feature in an array. Based
+// on Section 5 of Lowe's paper. This function adds features to the array when
+// there is more than one dominant orientation at a given feature location.
+ template<typename T>
+ void calcOrientation(
+ float* x_out,
+ float* y_out,
+ unsigned* layer_out,
+ float* response_out,
+ float* size_out,
+ float* ori_out,
+ unsigned* counter,
+ const float* x_in,
+ const float* y_in,
+ const unsigned* layer_in,
+ const float* response_in,
+ const float* size_in,
+ const unsigned total_feat,
+ const std::vector< Array<T> >& gauss_pyr,
+ const unsigned max_feat,
+ const unsigned octave,
+ const unsigned n_layers,
+ const bool double_input)
+ {
+ const int n = OriHistBins;
+
+ float hist[OriHistBins];
+ float temphist[OriHistBins];
+
+ for (unsigned f = 0; f < total_feat; f++) {
+ // Load keypoint information
+ const float real_x = x_in[f];
+ const float real_y = y_in[f];
+ const unsigned layer = layer_in[f];
+ const float response = response_in[f];
+ const float size = size_in[f];
+
+ const int pt_x = (int)round(real_x / (1 << octave));
+ const int pt_y = (int)round(real_y / (1 << octave));
+
+ // Calculate auxiliary parameters
+ const float scl_octv = size*0.5f / (1 << octave);
+ const int radius = (int)round(OriRadius * scl_octv);
+ const float sigma = OriSigFctr * scl_octv;
+ const int len = (radius*2+1);
+ const float exp_denom = 2.f * sigma * sigma;
+
+ // Points img to correct Gaussian pyramid layer
+ const Array<T> img = gauss_pyr[octave*(n_layers+3) + layer];
+ const T* img_ptr = img.get();
+
+ for (int i = 0; i < OriHistBins; i++)
+ hist[i] = 0.f;
+
+ af::dim4 idims = img.dims();
+
+ // Calculate orientation histogram
+ for (int l = 0; l < len*len; l++) {
+ int i = l / len - radius;
+ int j = l % len - radius;
+
+ int y = pt_y + i;
+ int x = pt_x + j;
+ if (y < 1 || y >= idims[0] - 1 ||
+ x < 1 || x >= idims[1] - 1)
+ continue;
+
+ float dx = (float)(IPTR(x+1, y) - IPTR(x-1, y));
+ float dy = (float)(IPTR(x, y-1) - IPTR(x, y+1));
+
+ float mag = sqrt(dx*dx+dy*dy);
+ float ori = atan2(dy,dx);
+ float w = exp(-(i*i + j*j)/exp_denom);
+
+ int bin = round(n*(ori+PI_VAL)/(2.f*PI_VAL));
+ bin = bin < n ? bin : 0;
+
+ hist[bin] += w*mag;
+ }
+
+ for (int i = 0; i < SmoothOriPasses; i++) {
+ for (int j = 0; j < n; j++) {
+ temphist[j] = hist[j];
+ }
+ for (int j = 0; j < n; j++) {
+ float prev = (j == 0) ? temphist[n-1] : temphist[j-1];
+ float next = (j+1 == n) ? temphist[0] : temphist[j+1];
+ hist[j] = 0.25f * prev + 0.5f * temphist[j] + 0.25f * next;
+ }
+ }
+
+ float omax = hist[0];
+ for (int i = 1; i < n; i++)
+ omax = max(omax, hist[i]);
+
+ float mag_thr = (float)(omax * OriPeakRatio);
+ int l, r;
+ for (int j = 0; j < n; j++) {
+ l = (j == 0) ? n - 1 : j - 1;
+ r = (j + 1) % n;
+ if (hist[j] > hist[l] &&
+ hist[j] > hist[r] &&
+ hist[j] >= mag_thr) {
+ if (*counter < max_feat) {
+ float bin = j + 0.5f * (hist[l] - hist[r]) /
+ (hist[l] - 2.0f*hist[j] + hist[r]);
+ bin = (bin < 0.0f) ? bin + n : (bin >= n) ? bin - n : bin;
+ float ori = 360.f - ((360.f/n) * bin);
+
+ float new_real_x = real_x;
+ float new_real_y = real_y;
+ float new_size = size;
+
+ if (double_input) {
+ float scale = 0.5f;
+ new_real_x *= scale;
+ new_real_y *= scale;
+ new_size *= scale;
+ }
+
+ x_out[*counter] = new_real_x;
+ y_out[*counter] = new_real_y;
+ layer_out[*counter] = layer;
+ response_out[*counter] = response;
+ size_out[*counter] = new_size;
+ ori_out[*counter] = ori;
+ (*counter)++;
+ }
+ }
+ }
+ }
+ }
+
+ void normalizeDesc(
+ float* desc,
+ const int histlen)
+ {
+ float len_sq = 0.0f;
+
+ for (int i = 0; i < histlen; i++)
+ len_sq += desc[i] * desc[i];
+
+ float len_inv = 1.0f / sqrt(len_sq);
+
+ for (int i = 0; i < histlen; i++) {
+ desc[i] *= len_inv;
+ }
+ }
+
+// Computes feature descriptors for features in an array. Based on Section 6
+// of Lowe's paper.
+ template<typename T>
+ void computeDescriptor(
+ float* desc_out,
+ const unsigned desc_len,
+ const float* x_in,
+ const float* y_in,
+ const unsigned* layer_in,
+ const float* response_in,
+ const float* size_in,
+ const float* ori_in,
+ const unsigned total_feat,
+ const std::vector< Array<T> >& gauss_pyr,
+ const int d,
+ const int n,
+ const float scale,
+ const float sigma,
+ const unsigned octave,
+ const unsigned n_layers)
+ {
+ float desc[128];
+
+ for (unsigned f = 0; f < total_feat; f++) {
+ const unsigned layer = layer_in[f];
+ float ori = (360.f - ori_in[f]) * PI_VAL / 180.f;
+ ori = (ori > PI_VAL) ? ori - PI_VAL*2 : ori;
+ //const float size = size_in[f];
+ const int fx = round(x_in[f] * scale);
+ const int fy = round(y_in[f] * scale);
+
+ // Points img to correct Gaussian pyramid layer
+ Array<T> img = gauss_pyr[octave*(n_layers+3) + layer];
+ const T* img_ptr = img.get();
+ af::dim4 idims = img.dims();
+
+ float cos_t = cos(ori);
+ float sin_t = sin(ori);
+ float bins_per_rad = n / (PI_VAL * 2.f);
+ float exp_denom = d * d * 0.5f;
+ float hist_width = DescrSclFctr * sigma * powf(2.f, layer/n_layers);
+ int radius = hist_width * sqrt(2.f) * (d + 1.f) * 0.5f + 0.5f;
+
+ int len = radius*2+1;
+ const int histlen = d*d*n;
+
+ for (int i = 0; i < histlen; i++)
+ desc[i] = 0.f;
+
+ // Calculate orientation histogram
+ for (int l = 0; l < len*len; l++) {
+ int i = l / len - radius;
+ int j = l % len - radius;
+
+ int y = fy + i;
+ int x = fx + j;
+
+ float x_rot = (j * cos_t - i * sin_t) / hist_width;
+ float y_rot = (j * sin_t + i * cos_t) / hist_width;
+ float xbin = x_rot + d/2 - 0.5f;
+ float ybin = y_rot + d/2 - 0.5f;
+
+ if (ybin > -1.0f && ybin < d && xbin > -1.0f && xbin < d &&
+ y > 0 && y < idims[0] - 1 && x > 0 && x < idims[1] - 1) {
+ float dx = (float)(IPTR(x+1, y) - IPTR(x-1, y));
+ float dy = (float)(IPTR(x, y-1) - IPTR(x, y+1));
+
+ float grad_mag = sqrt(dx*dx + dy*dy);
+ float grad_ori = atan2(dy, dx) - ori;
+ while (grad_ori < 0.0f)
+ grad_ori += PI_VAL*2;
+ while (grad_ori >= PI_VAL*2)
+ grad_ori -= PI_VAL*2;
+
+ float w = exp(-(x_rot*x_rot + y_rot*y_rot) / exp_denom);
+ float obin = grad_ori * bins_per_rad;
+ float mag = grad_mag*w;
+
+ int x0 = floor(xbin);
+ int y0 = floor(ybin);
+ int o0 = floor(obin);
+ xbin -= x0;
+ ybin -= y0;
+ obin -= o0;
+
+ for (int yl = 0; yl <= 1; yl++) {
+ int yb = y0 + yl;
+ if (yb >= 0 && yb < d) {
+ float v_y = mag * ((yl == 0) ? 1.0f - ybin : ybin);
+ for (int xl = 0; xl <= 1; xl++) {
+ int xb = x0 + xl;
+ if (xb >= 0 && xb < d) {
+ float v_x = v_y * ((xl == 0) ? 1.0f - xbin : xbin);
+ for (int ol = 0; ol <= 1; ol++) {
+ int ob = (o0 + ol) % n;
+ float v_o = v_x * ((ol == 0) ? 1.0f - obin : obin);
+ desc[(yb*d + xb)*n + ob] += v_o;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ normalizeDesc(desc, histlen);
+
+ for (int i = 0; i < d*d*n; i++)
+ desc[i] = min(desc[i], DescrMagThr);
+
+ normalizeDesc(desc, histlen);
+
+ // Calculate final descriptor values
+ for (int k = 0; k < d*d*n; k++) {
+ desc_out[f*desc_len+k] = round(min(255.f, desc[k] * IntDescrFctr));
+ }
+ }
+ }
+
+#undef IPTR
+
+ template<typename T, typename convAccT>
+ Array<T> createInitialImage(
+ const Array<T>& img,
+ const float init_sigma,
+ const bool double_input)
+ {
+ af::dim4 idims = img.dims();
+
+ Array<T> init_img = createEmptyArray<T>(af::dim4());
+
+ float s = (double_input) ? sqrt(init_sigma * init_sigma - InitSigma * InitSigma * 4)
+ : sqrt(init_sigma * init_sigma - InitSigma * InitSigma);
+
+ Array<T> filter = gauss_filter<T>(s);
+
+ if (double_input) {
+ Array<T> double_img = resize<T>(img, idims[0] * 2, idims[1] * 2, AF_INTERP_BILINEAR);
+ init_img = convolve2<T, convAccT, false>(double_img, filter, filter);
+ }
+ else {
+ init_img = convolve2<T, convAccT, false>(init_img, filter, filter);
+ }
+
+ return init_img;
+ }
+
+ template<typename T, typename convAccT>
+ std::vector< Array<T> > buildGaussPyr(
+ const Array<T>& init_img,
+ const unsigned n_octaves,
+ const unsigned n_layers,
+ const float init_sigma)
+ {
+ // Precompute Gaussian sigmas using the following formula:
+ // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
+ std::vector<float> sig_layers(n_layers + 3);
+ sig_layers[0] = init_sigma;
+ float k = std::pow(2.0f, 1.0f / n_layers);
+ for (unsigned i = 1; i < n_layers + 3; i++) {
+ float sig_prev = std::pow(k, i-1) * init_sigma;
+ float sig_total = sig_prev * k;
+ sig_layers[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev);
+ }
+
+ // Gaussian Pyramid
+ std::vector< Array<T> > gauss_pyr(n_octaves * (n_layers+3), createEmptyArray<T>(af::dim4()));
+ for (unsigned o = 0; o < n_octaves; o++) {
+ for (unsigned l = 0; l < n_layers+3; l++) {
+ unsigned src_idx = (l == 0) ? (o-1)*(n_layers+3) + n_layers : o*(n_layers+3) + l-1;
+ unsigned idx = o*(n_layers+3) + l;
+
+ if (o == 0 && l == 0) {
+ gauss_pyr[idx] = init_img;
+ }
+ else if (l == 0) {
+ af::dim4 sdims = gauss_pyr[src_idx].dims();
+ gauss_pyr[idx] = resize<T>(gauss_pyr[src_idx], sdims[0] / 2, sdims[1] / 2, AF_INTERP_BILINEAR);
+ }
+ else {
+ Array<T> filter = gauss_filter<T>(sig_layers[l]);
+
+ gauss_pyr[idx] = convolve2<T, convAccT, false>(gauss_pyr[src_idx], filter, filter);
+ }
+ }
+ }
+
+ return gauss_pyr;
+ }
+
+ template<typename T>
+ std::vector< Array<T> > buildDoGPyr(
+ std::vector< Array<T> >& gauss_pyr,
+ const unsigned n_octaves,
+ const unsigned n_layers)
+ {
+ // DoG Pyramid
+ std::vector< Array<T> > dog_pyr(n_octaves * (n_layers+2), createEmptyArray<T>(af::dim4()));
+ for (unsigned o = 0; o < n_octaves; o++) {
+ for (unsigned l = 0; l < n_layers+2; l++) {
+ unsigned idx = o*(n_layers+2) + l;
+ unsigned bottom = o*(n_layers+3) + l;
+ unsigned top = o*(n_layers+3) + l+1;
+
+ dog_pyr[idx] = createEmptyArray<T>(gauss_pyr[bottom].dims());
+
+ sub<T>(dog_pyr[idx], gauss_pyr[top], gauss_pyr[bottom]);
+ }
+ }
+
+ return dog_pyr;
+ }
+
+
+ template<typename T, typename convAccT>
+ unsigned sift_impl(Array<float>& x, Array<float>& y, Array<float>& score,
+ Array<float>& ori, Array<float>& size, Array<float>& desc,
+ const Array<T>& in, const unsigned n_layers,
+ const float contrast_thr, const float edge_thr,
+ const float init_sigma, const bool double_input,
+ const float img_scale, const float feature_ratio)
+ {
+ af::dim4 idims = in.dims();
+
+ const unsigned min_dim = (double_input) ? min(idims[0]*2, idims[1]*2)
+ : min(idims[0], idims[1]);
+ const unsigned n_octaves = floor(log(min_dim) / log(2)) - 2;
+
+ Array<T> init_img = createInitialImage<T, convAccT>(in, init_sigma, double_input);
+
+ std::vector< Array<T> > gauss_pyr = buildGaussPyr<T, convAccT>(init_img, n_octaves, n_layers, init_sigma);
+
+ std::vector< Array<T> > dog_pyr = buildDoGPyr<T>(gauss_pyr, n_octaves, n_layers);
+
+ std::vector<float*> x_pyr(n_octaves, NULL);
+ std::vector<float*> y_pyr(n_octaves, NULL);
+ std::vector<float*> response_pyr(n_octaves, NULL);
+ std::vector<float*> size_pyr(n_octaves, NULL);
+ std::vector<float*> ori_pyr(n_octaves, NULL);
+ std::vector<float*> desc_pyr(n_octaves, NULL);
+ std::vector<unsigned> feat_pyr(n_octaves, 0);
+ unsigned total_feat = 0;
+
+ const unsigned d = DescrWidth;
+ const unsigned n = DescrHistBins;
+ const unsigned desc_len = d*d*n;
+
+ for (unsigned i = 0; i < n_octaves; i++) {
+ af::dim4 ddims = dog_pyr[i*(n_layers+2)].dims();
+ if (ddims[0]-2*ImgBorder < 1 ||
+ ddims[1]-2*ImgBorder < 1)
+ continue;
+
+ const unsigned imel = ddims[0] * ddims[1];
+ const unsigned max_feat = ceil(imel * feature_ratio);
+
+ float* extrema_x = memAlloc<float>(max_feat);
+ float* extrema_y = memAlloc<float>(max_feat);
+ unsigned* extrema_layer = memAlloc<unsigned>(max_feat);
+ unsigned extrema_feat = 0;
+
+ for (unsigned j = 1; j <= n_layers; j++) {
+ unsigned prev = i*(n_layers+2) + j-1;
+ unsigned center = i*(n_layers+2) + j;
+ unsigned next = i*(n_layers+2) + j+1;
+
+ unsigned layer = j;
+
+ float extrema_thr = 0.5f * contrast_thr / n_layers;
+ detectExtrema<T>(extrema_x, extrema_y, extrema_layer, &extrema_feat,
+ dog_pyr[prev], dog_pyr[center], dog_pyr[next],
+ layer, max_feat, extrema_thr);
+ }
+
+ extrema_feat = min(extrema_feat, max_feat);
+
+ if (extrema_feat == 0) {
+ memFree(extrema_x);
+ memFree(extrema_y);
+ memFree(extrema_layer);
+
+ continue;
+ }
+
+ unsigned interp_feat = 0;
+
+ float* interp_x = memAlloc<float>(extrema_feat);
+ float* interp_y = memAlloc<float>(extrema_feat);
+ unsigned* interp_layer = memAlloc<unsigned>(extrema_feat);
+ float* interp_response = memAlloc<float>(extrema_feat);
+ float* interp_size = memAlloc<float>(extrema_feat);
+
+ interpolateExtrema<T>(interp_x, interp_y, interp_layer,
+ interp_response, interp_size, &interp_feat,
+ extrema_x, extrema_y, extrema_layer, extrema_feat,
+ dog_pyr, max_feat, i, n_layers,
+ contrast_thr, edge_thr, init_sigma, img_scale);
+
+ interp_feat = min(interp_feat, max_feat);
+
+ if (interp_feat == 0) {
+ memFree(interp_x);
+ memFree(interp_y);
+ memFree(interp_layer);
+ memFree(interp_response);
+ memFree(interp_size);
+
+ continue;
+ }
+
+ std::vector<feat_t> sorted_feat;
+ array_to_feat(sorted_feat, interp_x, interp_y, interp_layer, interp_response, interp_size, interp_feat);
+ std::sort(sorted_feat.begin(), sorted_feat.end(), feat_cmp);
+
+ memFree(interp_x);
+ memFree(interp_y);
+ memFree(interp_layer);
+ memFree(interp_response);
+ memFree(interp_size);
+
+ unsigned nodup_feat = 0;
+
+ float* nodup_x = memAlloc<float>(interp_feat);
+ float* nodup_y = memAlloc<float>(interp_feat);
+ unsigned* nodup_layer = memAlloc<unsigned>(interp_feat);
+ float* nodup_response = memAlloc<float>(interp_feat);
+ float* nodup_size = memAlloc<float>(interp_feat);
+
+ removeDuplicates(nodup_x, nodup_y, nodup_layer,
+ nodup_response, nodup_size, &nodup_feat,
+ sorted_feat);
+
+ const unsigned max_oriented_feat = nodup_feat * 3;
+
+ float* oriented_x = memAlloc<float>(max_oriented_feat);
+ float* oriented_y = memAlloc<float>(max_oriented_feat);
+ unsigned* oriented_layer = memAlloc<unsigned>(max_oriented_feat);
+ float* oriented_response = memAlloc<float>(max_oriented_feat);
+ float* oriented_size = memAlloc<float>(max_oriented_feat);
+ float* oriented_ori = memAlloc<float>(max_oriented_feat);
+
+ unsigned oriented_feat = 0;
+
+ calcOrientation<T>(oriented_x, oriented_y, oriented_layer,
+ oriented_response, oriented_size, oriented_ori, &oriented_feat,
+ nodup_x, nodup_y, nodup_layer,
+ nodup_response, nodup_size, nodup_feat,
+ gauss_pyr, max_oriented_feat, i, n_layers, double_input);
+
+ memFree(nodup_x);
+ memFree(nodup_y);
+ memFree(nodup_layer);
+ memFree(nodup_response);
+ memFree(nodup_size);
+
+ if (oriented_feat == 0) {
+ memFree(oriented_x);
+ memFree(oriented_y);
+ memFree(oriented_layer);
+ memFree(oriented_response);
+ memFree(oriented_size);
+ memFree(oriented_ori);
+
+ continue;
+ }
+
+ float* desc = memAlloc<float>(oriented_feat * desc_len);
+
+ float scale = 1.f/(1 << i);
+ if (double_input) scale *= 2.f;
+
+ computeDescriptor<T>(desc, desc_len,
+ oriented_x, oriented_y, oriented_layer,
+ oriented_response, oriented_size, oriented_ori,
+ oriented_feat, gauss_pyr, d, n, scale, init_sigma, i, n_layers);
+
+ total_feat += oriented_feat;
+ feat_pyr[i] = oriented_feat;
+
+ if (oriented_feat > 0) {
+ x_pyr[i] = oriented_x;
+ y_pyr[i] = oriented_y;
+ response_pyr[i] = oriented_response;
+ ori_pyr[i] = oriented_ori;
+ size_pyr[i] = oriented_size;
+ desc_pyr[i] = desc;
+ }
+ }
+
+ if (total_feat > 0) {
+ const af::dim4 total_feat_dims(total_feat);
+ const af::dim4 desc_dims(desc_len, total_feat);
+
+ // Allocate output memory
+ x = createEmptyArray<float>(total_feat_dims);
+ y = createEmptyArray<float>(total_feat_dims);
+ score = createEmptyArray<float>(total_feat_dims);
+ ori = createEmptyArray<float>(total_feat_dims);
+ size = createEmptyArray<float>(total_feat_dims);
+ desc = createEmptyArray<float>(desc_dims);
+
+ float* x_ptr = x.get();
+ float* y_ptr = y.get();
+ float* score_ptr = score.get();
+ float* ori_ptr = ori.get();
+ float* size_ptr = size.get();
+ float* desc_ptr = desc.get();
+
+ unsigned offset = 0;
+ for (unsigned i = 0; i < n_octaves; i++) {
+ if (feat_pyr[i] == 0)
+ continue;
+
+ memcpy(x_ptr+offset, x_pyr[i], feat_pyr[i] * sizeof(float));
+ memcpy(y_ptr+offset, y_pyr[i], feat_pyr[i] * sizeof(float));
+ memcpy(score_ptr+offset, response_pyr[i], feat_pyr[i] * sizeof(float));
+ memcpy(ori_ptr+offset, ori_pyr[i], feat_pyr[i] * sizeof(float));
+ memcpy(size_ptr+offset, size_pyr[i], feat_pyr[i] * sizeof(float));
+
+ memcpy(desc_ptr+(offset*desc_len), desc_pyr[i], feat_pyr[i] * desc_len * sizeof(float));
+
+ memFree(x_pyr[i]);
+ memFree(y_pyr[i]);
+ memFree(response_pyr[i]);
+ memFree(ori_pyr[i]);
+ memFree(size_pyr[i]);
+ memFree(desc_pyr[i]);
+
+ offset += feat_pyr[i];
+ }
+ }
+
+ return total_feat;
+ }
+}
diff --git a/src/backend/cuda/kernel/sift.hpp b/src/backend/cuda/kernel/sift_nonfree.hpp
similarity index 100%
rename from src/backend/cuda/kernel/sift.hpp
rename to src/backend/cuda/kernel/sift_nonfree.hpp
diff --git a/src/backend/cuda/sift.cu b/src/backend/cuda/sift.cu
index d61ed4f..1fb6800 100644
--- a/src/backend/cuda/sift.cu
+++ b/src/backend/cuda/sift.cu
@@ -14,7 +14,10 @@
#include <Array.hpp>
#include <err_cuda.hpp>
#include <handle.hpp>
-#include <kernel/sift.hpp>
+
+#ifdef AF_NONFREE
+#include <kernel/sift_nonfree.hpp>
+#endif
using af::dim4;
using af::features;
@@ -30,6 +33,7 @@ unsigned sift(Array<float>& x, Array<float>& y, Array<float>& score,
const float init_sigma, const bool double_input,
const float img_scale, const float feature_ratio)
{
+#ifdef AF_NONFREE
const dim4 dims = in.dims();
unsigned nfeat_out;
@@ -65,6 +69,9 @@ unsigned sift(Array<float>& x, Array<float>& y, Array<float>& score,
}
return nfeat_out;
+#else
+ AF_ERROR("ArrayFire was not built with nonfree support, SIFT disabled\n", AFF_ERR_NONFREE);
+#endif
}
#define INSTANTIATE(T, convAccT)\
diff --git a/src/backend/opencl/kernel/sift.cl b/src/backend/opencl/kernel/sift_nonfree.cl
similarity index 100%
rename from src/backend/opencl/kernel/sift.cl
rename to src/backend/opencl/kernel/sift_nonfree.cl
diff --git a/src/backend/opencl/kernel/sift.hpp b/src/backend/opencl/kernel/sift_nonfree.hpp
similarity index 99%
rename from src/backend/opencl/kernel/sift.hpp
rename to src/backend/opencl/kernel/sift_nonfree.hpp
index 5fcead5..96b00eb 100644
--- a/src/backend/opencl/kernel/sift.hpp
+++ b/src/backend/opencl/kernel/sift_nonfree.hpp
@@ -85,7 +85,7 @@
#include <kernel/fast.hpp>
#include <kernel/resize.hpp>
#include <kernel/sort_index.hpp>
-#include <kernel_headers/sift.hpp>
+#include <kernel_headers/sift_nonfree.hpp>
#include <memory.hpp>
#include <vector>
@@ -422,7 +422,7 @@ void sift(unsigned* out_feat,
}
cl::Program prog;
- buildProgram(prog, sift_cl, sift_cl_len, options.str());
+ buildProgram(prog, sift_nonfree_cl, sift_nonfree_cl_len, options.str());
siftProgs[device] = new Program(prog);
suKernel[device] = new Kernel(*siftProgs[device], "sub");
diff --git a/src/backend/opencl/sift.cpp b/src/backend/opencl/sift.cpp
index 1973e3e..898f361 100644
--- a/src/backend/opencl/sift.cpp
+++ b/src/backend/opencl/sift.cpp
@@ -14,7 +14,10 @@
#include <Array.hpp>
#include <err_opencl.hpp>
#include <handle.hpp>
-#include <kernel/sift.hpp>
+
+#ifdef AF_NONFREE
+#include <kernel/sift_nonfree.hpp>
+#endif
using af::dim4;
using af::features;
@@ -30,6 +33,7 @@ unsigned sift(Array<float>& x_out, Array<float>& y_out, Array<float>& score_out,
const float init_sigma, const bool double_input,
const float img_scale, const float feature_ratio)
{
+#ifdef AF_NONFREE
unsigned nfeat_out;
unsigned desc_len;
@@ -57,16 +61,19 @@ unsigned sift(Array<float>& x_out, Array<float>& y_out, Array<float>& score_out,
}
return nfeat_out;
+#else
+ AF_ERROR("ArrayFire was not built with nonfree support, SIFT disabled\n", AFF_ERR_NONFREE);
+#endif
}
-#define INSTANTIATE(T, convAccT) \
+#define INSTANTIATE(T, convAccT) \
template unsigned sift<T, convAccT>(Array<float>& x_out, Array<float>& y_out, \
Array<float>& score_out, Array<float>& ori_out, \
- Array<float>& size_out, Array<float>& desc_out, \
- const Array<T>& in, const unsigned n_layers, \
- const float contrast_thr, const float edge_thr, \
- const float init_sigma, const bool double_input, \
+ Array<float>& size_out, Array<float>& desc_out, \
+ const Array<T>& in, const unsigned n_layers, \
+ const float contrast_thr, const float edge_thr, \
+ const float init_sigma, const bool double_input, \
const float img_scale, const float feature_ratio);
INSTANTIATE(float , float )
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 339bffd..662a74d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,7 +12,7 @@ MACRO(CREATE_TESTS BACKEND GTEST_LIBS OTHER_LIBS)
GET_FILENAME_COMPONENT(FNAME ${FILE} NAME_WE)
SET(TEST_NAME ${FNAME}_${BACKEND})
- IF ("${FILE}" MATCHES ".manual.")
+ IF ("${FILE}" MATCHES ".manual." OR "${FILE}" MATCHES ".nonfree.")
MESSAGE(STATUS "Removing ${FILE} from ctest")
ELSE()
ADD_TEST(Test_${TEST_NAME} ${TEST_NAME})
diff --git a/test/sift.cpp b/test/sift_nonfree.cpp
similarity index 100%
rename from test/sift.cpp
rename to test/sift_nonfree.cpp
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list