[arrayfire] 257/408: Templated SIFT gaussianElimination() in CPU and CUDA backends

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:09 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit cce0e73a6f783a1e7d2b14ddd36a5fbf6e322ad8
Author: Peter Andreas Entschev <peter at arrayfire.com>
Date:   Thu Aug 13 17:09:33 2015 -0400

    Templated SIFT gaussianElimination() in CPU and CUDA backends
---
 src/backend/cpu/sift.cpp         | 25 +++++++++++++------------
 src/backend/cuda/kernel/sift.hpp | 32 +++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/src/backend/cpu/sift.cpp b/src/backend/cpu/sift.cpp
index f687dc5..8f872fe 100644
--- a/src/backend/cpu/sift.cpp
+++ b/src/backend/cpu/sift.cpp
@@ -189,30 +189,31 @@ Array<T> gauss_filter(float sigma)
     return filter;
 }
 
-void gaussianElimination(float* A, float* b, float* x, const int n)
+template<int N>
+void gaussianElimination(float* A, float* b, float* x)
 {
     // forward elimination
-    for (int i = 0; i < n-1; i++) {
-        for (int j = i+1; j < n; j++) {
-            float s = A[j*n+i] / A[i*n+i];
+    for (int i = 0; i < N-1; i++) {
+        for (int j = i+1; j < N; j++) {
+            float s = A[j*N+i] / A[i*N+i];
 
-            for (int k = i; k < n; k++)
-                A[j*n+k] -= s * A[i*n+k];
+            for (int k = i; k < N; k++)
+                A[j*N+k] -= s * A[i*N+k];
 
             b[j] -= s * b[i];
         }
     }
 
-    for (int i = 0; i < n; i++)
+    for (int i = 0; i < N; i++)
         x[i] = 0;
 
     // backward substitution
     float sum = 0;
-    for (int i = 0; i <= n-2; i++) {
+    for (int i = 0; i <= N-2; i++) {
         sum = b[i];
-        for (int j = i+1; j < n; j++)
-            sum -= A[i*n+j] * x[j];
-        x[i] = sum / A[i*n+i];
+        for (int j = i+1; j < N; j++)
+            sum -= A[i*N+j] * x[j];
+        x[i] = sum / A[i*N+i];
     }
 }
 
@@ -358,7 +359,7 @@ void interpolateExtrema(
                           dxs, dys, dss};
 
             float X[3];
-            gaussianElimination(H, dD, X, 3);
+            gaussianElimination<3>(H, dD, X);
 
             xl = -X[2];
             xy = -X[1];
diff --git a/src/backend/cuda/kernel/sift.hpp b/src/backend/cuda/kernel/sift.hpp
index 3d67f03..f551a61 100644
--- a/src/backend/cuda/kernel/sift.hpp
+++ b/src/backend/cuda/kernel/sift.hpp
@@ -185,31 +185,37 @@ Param<T> gauss_filter(float sigma)
     return gauss_filter;
 }
 
-__inline__ __device__ void gaussianElimination(float* A, float* b, float* x, const int n)
+template<int N>
+__inline__ __device__ void gaussianElimination(float* A, float* b, float* x)
 {
     // forward elimination
-    for (int i = 0; i < n-1; i++) {
-        for (int j = i+1; j < n; j++) {
-            float s = A[j*n+i] / A[i*n+i];
+    #pragma unroll
+    for (int i = 0; i < N-1; i++) {
+        #pragma unroll
+        for (int j = i+1; j < N; j++) {
+            float s = A[j*N+i] / A[i*N+i];
 
-            //for (int k = i+1; k < n; k++)
-            for (int k = i; k < n; k++)
-                A[j*n+k] -= s * A[i*n+k];
+            #pragma unroll
+            for (int k = i; k < N; k++)
+                A[j*N+k] -= s * A[i*N+k];
 
             b[j] -= s * b[i];
         }
     }
 
-    for (int i = 0; i < n; i++)
+    #pragma unroll
+    for (int i = 0; i < N; i++)
         x[i] = 0;
 
     // backward substitution
     float sum = 0;
-    for (int i = 0; i <= n-2; i++) {
+    #pragma unroll
+    for (int i = 0; i <= N-2; i++) {
         sum = b[i];
-        for (int j = i+1; j < n; j++)
-            sum -= A[i*n+j] * x[j];
-        x[i] = sum / A[i*n+i];
+        #pragma unroll
+        for (int j = i+1; j < N; j++)
+            sum -= A[i*N+j] * x[j];
+        x[i] = sum / A[i*N+i];
     }
 }
 
@@ -435,7 +441,7 @@ __global__ void interpolateExtrema(
                           dxs, dys, dss};
 
             float X[3];
-            gaussianElimination(H, dD, X, 3);
+            gaussianElimination<3>(H, dD, X);
 
             xl = -X[2];
             xy = -X[1];

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list