[arrayfire] 271/408: Fixed OpenCL SIFT bug causing segmentation faults on Intel

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:12 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 966a1303668a0a522f7964c02f8937dda3143f55
Author: Peter Andreas Entschev <peter at arrayfire.com>
Date:   Mon Aug 17 15:37:24 2015 -0400

    Fixed OpenCL SIFT bug causing segmentation faults on Intel
---
 src/backend/opencl/kernel/sift.cl | 91 +++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/src/backend/opencl/kernel/sift.cl b/src/backend/opencl/kernel/sift.cl
index b93a8c3..57610df 100644
--- a/src/backend/opencl/kernel/sift.cl
+++ b/src/backend/opencl/kernel/sift.cl
@@ -521,13 +521,22 @@ __kernel void calcOrientation(
     __local float* hist = l_mem;
     __local float* temphist = l_mem + n*8;
 
+    // Initialize temporary histogram
+    for (int i = lid_x; i < n; i += lsz_x) {
+        hist[lid_y*n + i] = 0.f;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    float real_x, real_y, response, size;
+    unsigned layer;
+
     if (f < total_feat) {
         // Load keypoint information
-        const float real_x = x_in[f];
-        const float real_y = y_in[f];
-        const unsigned layer = layer_in[f];
-        const float response = response_in[f];
-        const float size = size_in[f];
+        real_x = x_in[f];
+        real_y = y_in[f];
+        layer = layer_in[f];
+        response = response_in[f];
+        size = size_in[f];
 
         const int pt_x = (int)round(real_x / (1 << octave));
         const int pt_y = (int)round(real_y / (1 << octave));
@@ -539,12 +548,6 @@ __kernel void calcOrientation(
         const int len = (radius*2+1);
         const float exp_denom = 2.f * sigma * sigma;
 
-        // Initialize temporary histogram
-        for (int i = lid_x; i < n; i += lsz_x) {
-            hist[lid_y*n + i] = 0.f;
-        }
-        barrier(CLK_LOCAL_MEM_FENCE);
-
         const int dim0 = iGauss.dims[0];
         const int dim1 = iGauss.dims[1];
 
@@ -576,44 +579,46 @@ __kernel void calcOrientation(
 
             fatomic_add(&hist[lid_y*n+bin], w*mag);
         }
-        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
 
-        for (int i = 0; i < SMOOTH_ORI_PASSES; i++) {
-            for (int j = lid_x; j < n; j += lsz_x) {
-                temphist[lid_y*n+j] = hist[lid_y*n+j];
-            }
-            barrier(CLK_LOCAL_MEM_FENCE);
-            for (int j = lid_x; j < n; j += lsz_x) {
-                float prev = (j == 0) ? temphist[lid_y*n+n-1] : temphist[lid_y*n+j-1];
-                float next = (j+1 == n) ? temphist[lid_y*n] : temphist[lid_y*n+j+1];
-                hist[lid_y*n+j] = 0.25f * prev + 0.5f * temphist[lid_y*n+j] + 0.25f * next;
-            }
-            barrier(CLK_LOCAL_MEM_FENCE);
+    for (int i = 0; i < SMOOTH_ORI_PASSES; i++) {
+        for (int j = lid_x; j < n; j += lsz_x) {
+            temphist[lid_y*n+j] = hist[lid_y*n+j];
         }
-
-        for (int i = lid_x; i < n; i += lsz_x)
-            temphist[lid_y*n+i] = hist[lid_y*n+i];
-        barrier(CLK_LOCAL_MEM_FENCE);
-
-        if (lid_x < 16)
-            temphist[lid_y*n+lid_x] = fmax(hist[lid_y*n+lid_x], hist[lid_y*n+lid_x+16]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if (lid_x < 8)
-            temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+8]);
         barrier(CLK_LOCAL_MEM_FENCE);
-        if (lid_x < 4) {
-            temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], hist[lid_y*n+lid_x+32]);
-            temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+4]);
+        for (int j = lid_x; j < n; j += lsz_x) {
+            float prev = (j == 0) ? temphist[lid_y*n+n-1] : temphist[lid_y*n+j-1];
+            float next = (j+1 == n) ? temphist[lid_y*n] : temphist[lid_y*n+j+1];
+            hist[lid_y*n+j] = 0.25f * prev + 0.5f * temphist[lid_y*n+j] + 0.25f * next;
         }
         barrier(CLK_LOCAL_MEM_FENCE);
-        if (lid_x < 2)
-            temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+2]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-        if (lid_x < 1)
-            temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+1]);
-        barrier(CLK_LOCAL_MEM_FENCE);
-        float omax = temphist[lid_y*n];
+    }
+
+    for (int i = lid_x; i < n; i += lsz_x)
+        temphist[lid_y*n+i] = hist[lid_y*n+i];
+    barrier(CLK_LOCAL_MEM_FENCE);
 
+    if (lid_x < 16)
+        temphist[lid_y*n+lid_x] = fmax(hist[lid_y*n+lid_x], hist[lid_y*n+lid_x+16]);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (lid_x < 8)
+        temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+8]);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (lid_x < 4) {
+        temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], hist[lid_y*n+lid_x+32]);
+        temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+4]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (lid_x < 2)
+        temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+2]);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (lid_x < 1)
+        temphist[lid_y*n+lid_x] = fmax(temphist[lid_y*n+lid_x], temphist[lid_y*n+lid_x+1]);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    float omax = temphist[lid_y*n];
+
+    if (f < total_feat) {
         float mag_thr = (float)(omax * ORI_PEAK_RATIO);
         int l, r;
         float bin;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list