[hamradio-commits] [gnss-sdr] 68/149: fix sse implementations

Carles Fernandez carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:04 UTC 2016


This is an automated email from the git hooks/post-receive script.

carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.

commit 38d4d8aa9aea3eb678fb052d5b4fe3dad544996f
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date:   Sat Jan 16 20:57:55 2016 +0100

    fix sse implementations
---
 .../kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h         |  8 ++++----
 .../kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h    | 12 ++++++------
 .../volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h     |  4 ++--
 .../volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h        | 10 ++++++----
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h
index cd11fd4..bf20d59 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h
@@ -70,7 +70,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
             cPtr += 16;
         }
 
-    for(unsigned int i = 0; i<(num_points % 16); ++i)
+    for(unsigned int i = sse_iters * 16; i < num_points; ++i)
         {
             *cPtr++ = (*aPtr++) + (*bPtr++);
         }
@@ -134,14 +134,14 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a
 
             cVal = _mm_add_epi8(aVal, bVal);
 
-            _mm_store_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
+            _mm_store_si128((__m128i*)cPtr, cVal); // Store the results back into the C container
 
             aPtr += 16;
             bPtr += 16;
             cPtr += 16;
         }
 
-    for(unsigned int i = 0; i<(num_points % 16); ++i)
+    for(unsigned int i = sse_iters * 16; i < num_points; ++i)
         {
             *cPtr++ = (*aPtr++) + (*bPtr++);
         }
@@ -163,7 +163,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char
     const char* bPtr=  bVector;
     unsigned int number = 0;
 
-    for(number = 0; number < num_points; number++)
+    for(; number < num_points; number++)
         {
             *cPtr++ = (*aPtr++) + (*bPtr++);
         }
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h
index 4f3a4b3..c8424e2 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h
@@ -75,7 +75,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const
             c += 16;
         }
 
-    for (unsigned int i = 0; i<(num_points % 16); ++i)
+    for (unsigned int i = sse_iters * 16; i < num_points; ++i)
         {
             *c++ = lv_conj(*a++);
         }
@@ -109,7 +109,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con
             c += 8;
         }
 
-    for (unsigned int i = 0; i<(num_points % 8); ++i)
+    for (unsigned int i = sse_iters * 8; i < num_points; ++i)
         {
             *c++ = lv_conj(*a++);
         }
@@ -146,7 +146,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons
             c += 8;
         }
 
-    for (unsigned int i = 0; i<(num_points % 8); ++i)
+    for (unsigned int i = sse_iters * 8; i < num_points; ++i)
         {
             *c++ = lv_conj(*a++);
         }
@@ -220,7 +220,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const
             c += 16;
         }
 
-    for (unsigned int i = 0; i<(num_points % 16); ++i)
+    for (unsigned int i = sse_iters * 16; i < num_points; ++i)
         {
             *c++ = lv_conj(*a++);
         }
@@ -254,7 +254,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, con
             c += 8;
         }
 
-    for (unsigned int i = 0; i<(num_points % 8); ++i)
+    for (unsigned int i = sse_iters * 8; i < num_points; ++i)
         {
             *c++ = lv_conj(*a++);
         }
@@ -291,7 +291,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
             c += 8;
         }
 
-    for (unsigned int i = 0; i<(num_points % 8); ++i)
+    for (unsigned int i = sse_iters * 8; i < num_points; ++i)
         {
             *c++ = lv_conj(*a++);
         }
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h
index 5e6c84d..c8114e8 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h
@@ -90,7 +90,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV
             magnitudeVectorPtr += 16;
         }
 
-    for (unsigned int i = 0; i<(num_points % 16); ++i)
+    for (unsigned int i = sse_iters * 16; i < num_points; ++i)
         {
             const char valReal = *complexVectorPtr++;
             const char valImag = *complexVectorPtr++;
@@ -226,7 +226,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
             magnitudeVectorPtr += 16;
         }
 
-    for (unsigned int i = 0; i<(num_points % 16); ++i)
+    for (unsigned int i = sse_iters * 16; i < num_points; ++i)
         {
             const char valReal = *complexVectorPtr++;
             const char valImag = *complexVectorPtr++;
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h
index e1e3e81..89e41fd 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h
@@ -51,6 +51,7 @@
  */
 static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
 {
+    unsigned int number = 0;
     const unsigned int sse_iters = num_points / 8;
 
     __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
@@ -65,7 +66,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
     imagy = _mm_and_si128 (imagy, mult1);
     realy = _mm_and_si128 (y, mult1);
 
-    for(unsigned int number = 0;number < sse_iters; number++)
+    for(; number < sse_iters; number++)
         {
             x = _mm_lddqu_si128((__m128i*)a);
 
@@ -92,7 +93,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
             c += 8;
         }
 
-    for (unsigned int i = 0; i<(num_points % 8); ++i)
+    for (number = sse_iters * 8; number < num_points; ++number)
         {
             *c++ = (*a++) * scalar;
         }
@@ -164,6 +165,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector,
  */
 static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
 {
+    unsigned int number = 0;
     const unsigned int sse_iters = num_points / 8;
 
     __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
@@ -178,7 +180,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
     imagy = _mm_and_si128 (imagy, mult1);
     realy = _mm_and_si128 (y, mult1);
 
-    for(unsigned int number = 0;number < sse_iters; number++)
+    for(; number < sse_iters; number++)
         {
             x = _mm_load_si128((__m128i*)a);
 
@@ -205,7 +207,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
             c += 8;
         }
 
-    for (unsigned int i = 0; i<(num_points % 8); ++i)
+    for (number = sse_iters * 8; number < num_points; ++number)
         {
             *c++ = (*a++) * scalar;
         }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git



More information about the pkg-hamradio-commits mailing list