[hamradio-commits] [gnss-sdr] 89/149: Sout out the aligned/unaligned thing in old kernels
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:06 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit 9bf8b174ba35aee46bdbccfe90706bc5bda5efb8
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Wed Jan 20 18:16:09 2016 +0100
Sout out the aligned/unaligned thing in old kernels
---
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h | 35 +-
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h | 35 +-
.../volk_gnsssdr_64f_accumulator_64f.h | 28 +-
.../volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h | 37 +-
.../volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h | 650 ++++++++++-----------
.../kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h | 39 +-
.../volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h | 36 +-
.../volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h | 35 +-
.../volk_gnsssdr_8ic_magnitude_squared_8i.h | 36 +-
.../volk_gnsssdr_8ic_s8ic_multiply_8ic.h | 57 +-
.../volk_gnsssdr_8ic_x2_dot_prod_8ic.h | 70 +--
.../volk_gnsssdr_8ic_x2_multiply_8ic.h | 37 +-
.../volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h | 35 +-
13 files changed, 358 insertions(+), 772 deletions(-)
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
index 40babae..1bcf919 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
@@ -34,8 +34,8 @@
#include <math.h>
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
-#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
-#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
+#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
#ifdef LV_HAVE_SSE2
@@ -168,11 +168,6 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto
}
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
-#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
#ifdef LV_HAVE_SSE2
@@ -281,28 +276,4 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
}
#endif /* LV_HAVE_SSE */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
- \param inputVector The floating point input data buffer
- \param outputVector The 16 bit output data buffer
- \param num_points The number of data values to be converted
- */
-static inline void volk_gnsssdr_32fc_convert_16ic_a_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
-{
- float* inputVectorPtr = (float*)inputVector;
- int16_t* outputVectorPtr = (int16_t*)outputVector;
- float min_val = -32768;
- float max_val = 32767;
-
- for(unsigned int i = 0; i < num_points*2; i++)
- {
- if(inputVectorPtr[i] > max_val)
- inputVectorPtr[i] = max_val;
- else if(inputVectorPtr[i] < min_val)
- inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
- }
-}
-#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H */
+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
index 94cc2ec..da66f8d 100755
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
@@ -36,8 +36,8 @@
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
-#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
-#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
+#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -126,11 +126,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
}
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
-#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
#ifdef LV_HAVE_SSE2
@@ -195,28 +190,4 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
}
#endif /* LV_HAVE_SSE2 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
- \param inputVector The floating point input data buffer
- \param outputVector The 16 bit output data buffer
- \param num_points The number of data values to be converted
- */
-static inline void volk_gnsssdr_32fc_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
-{
- float* inputVectorPtr = (float*)inputVector;
- int8_t* outputVectorPtr = (int8_t*)outputVector;
- float min_val = -128;
- float max_val = 127;
-
- for(unsigned int i = 0; i < num_points*2; i++)
- {
- if(inputVectorPtr[i] > max_val)
- inputVectorPtr[i] = max_val;
- else if(inputVectorPtr[i] < min_val)
- inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
- }
-}
-#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H */
+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h
index 4ccb5a7..a1efb94 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h
@@ -134,15 +134,6 @@ static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H
-#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H
-
-#include <volk_gnsssdr/volk_gnsssdr_common.h>
-#include <inttypes.h>
-#include <stdio.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
@@ -222,21 +213,4 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const
}
#endif /* LV_HAVE_SSE3 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Accumulates the values in the input buffer
- \param result The accumulated result
- \param inputBuffer The buffer of data to be accumulated
- \param num_points The number of values in inputBuffer to be accumulated
- */
-static inline void volk_gnsssdr_64f_accumulator_64f_a_generic(double* result,const double* inputBuffer, unsigned int num_points){
- const double* aPtr = inputBuffer;
- double returnValue = 0;
-
- for(unsigned int number = 0;number < num_points; number++){
- returnValue += (*aPtr++);
- }
- *result = returnValue;
-}
-#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H */
+#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h
index 35c0dfa..942dc02 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h
@@ -32,11 +32,12 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
-#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
+#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
+#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
+#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@@ -99,16 +100,6 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const c
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
-#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
-
-#include <volk_gnsssdr/volk_gnsssdr_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
@@ -149,26 +140,6 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
}
#endif /* LV_HAVE_SSE3 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Accumulates the values in the input buffer
- \param result The accumulated result
- \param inputBuffer The buffer of data to be accumulated
- \param num_points The number of values in inputBuffer to be accumulated
- */
-static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points)
-{
- const char* aPtr = inputBuffer;
- char returnValue = 0;
-
- for(unsigned int number = 0;number < num_points; number++)
- {
- returnValue += (*aPtr++);
- }
- *result = returnValue;
-}
-#endif /* LV_HAVE_GENERIC */
-
#ifdef LV_HAVE_ORC
/*!
\brief Accumulates the values in the input buffer
@@ -190,5 +161,5 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const cha
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */
+#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h
index e00be59..4bad640 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h
@@ -32,11 +32,12 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
-#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
+#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_H
+#define INCLUDED_volk_gnsssdr_8i_index_max_16u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
+#include <stdio.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
@@ -46,62 +47,64 @@
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) {
- if(num_points > 0){
- const unsigned int sse_iters = num_points / 32;
-
- char* basePtr = (char*)src0;
- char* inputPtr = (char*)src0;
- char max = src0[0];
- unsigned int index = 0;
- __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
- __m256i ones, compareResults, currentValues;
- __m128i compareResultslo, compareResultshi, maxValues, lo, hi;
-
- ones = _mm256_set1_epi8(0xFF);
- maxValues = _mm_set1_epi8(max);
-
- for(unsigned int number = 0; number < sse_iters; number++)
+static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points)
+{
+ if(num_points > 0)
{
- currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
-
- lo = _mm256_castsi256_si128(currentValues);
- hi = _mm256_extractf128_si256(currentValues,1);
-
- compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
- compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
-
- //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
- compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
-
- if (!_mm256_testc_si256(compareResults, ones))
- {
- _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues);
-
- for(unsigned int i = 0; i < 32; i++)
+ const unsigned int sse_iters = num_points / 32;
+
+ char* basePtr = (char*)src0;
+ char* inputPtr = (char*)src0;
+ char max = src0[0];
+ unsigned int index = 0;
+ __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
+ __m256i ones, compareResults, currentValues;
+ __m128i compareResultslo, compareResultshi, maxValues, lo, hi;
+
+ ones = _mm256_set1_epi8(0xFF);
+ maxValues = _mm_set1_epi8(max);
+
+ for(unsigned int number = 0; number < sse_iters; number++)
{
- if(currentValuesBuffer[i] > max)
- {
- index = inputPtr - basePtr + i;
- max = currentValuesBuffer[i];
- }
+ currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
+
+ lo = _mm256_castsi256_si128(currentValues);
+ hi = _mm256_extractf128_si256(currentValues,1);
+
+ compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
+ compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
+
+ //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
+ compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
+
+ if (!_mm256_testc_si256(compareResults, ones))
+ {
+ _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues);
+
+ for(unsigned int i = 0; i < 32; i++)
+ {
+ if(currentValuesBuffer[i] > max)
+ {
+ index = inputPtr - basePtr + i;
+ max = currentValuesBuffer[i];
+ }
+ }
+ maxValues = _mm_set1_epi8(max);
+ }
+
+ inputPtr += 32;
}
- maxValues = _mm_set1_epi8(max);
- }
-
- inputPtr += 32;
- }
-
- for(unsigned int i = 0; i<(num_points % 32); ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+
+ for(unsigned int i = 0; i<(num_points % 32); ++i)
+ {
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
+ }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_AVX*/
@@ -114,53 +117,55 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
- if(num_points > 0){
- const unsigned int sse_iters = num_points / 16;
-
- char* basePtr = (char*)src0;
- char* inputPtr = (char*)src0;
- char max = src0[0];
- unsigned int index = 0;
- __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
- __m128i maxValues, compareResults, currentValues;
-
- maxValues = _mm_set1_epi8(max);
-
- for(unsigned int number = 0; number < sse_iters; number++)
+static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
+{
+ if(num_points > 0)
{
- currentValues = _mm_lddqu_si128((__m128i*)inputPtr);
-
- compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
-
- if (!_mm_test_all_ones(compareResults))
- {
- _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
-
- for(unsigned int i = 0; i < 16; i++)
+ const unsigned int sse_iters = num_points / 16;
+
+ char* basePtr = (char*)src0;
+ char* inputPtr = (char*)src0;
+ char max = src0[0];
+ unsigned int index = 0;
+ __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
+ __m128i maxValues, compareResults, currentValues;
+
+ maxValues = _mm_set1_epi8(max);
+
+ for(unsigned int number = 0; number < sse_iters; number++)
+ {
+ currentValues = _mm_lddqu_si128((__m128i*)inputPtr);
+
+ compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
+
+ if (!_mm_test_all_ones(compareResults))
+ {
+ _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
+
+ for(unsigned int i = 0; i < 16; i++)
+ {
+ if(currentValuesBuffer[i] > max)
+ {
+ index = inputPtr - basePtr + i;
+ max = currentValuesBuffer[i];
+ }
+ }
+ maxValues = _mm_set1_epi8(max);
+ }
+
+ inputPtr += 16;
+ }
+
+ for(unsigned int i = 0; i<(num_points % 16); ++i)
{
- if(currentValuesBuffer[i] > max)
- {
- index = inputPtr - basePtr + i;
- max = currentValuesBuffer[i];
- }
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
}
- maxValues = _mm_set1_epi8(max);
- }
-
- inputPtr += 16;
- }
-
- for(unsigned int i = 0; i<(num_points % 16); ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_SSE4_1*/
@@ -173,59 +178,61 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target,
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
- if(num_points > 0){
- const unsigned int sse_iters = num_points / 16;
-
- char* basePtr = (char*)src0;
- char* inputPtr = (char*)src0;
- char max = src0[0];
- unsigned int index = 0;
- unsigned short mask;
- __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
- __m128i maxValues, compareResults, currentValues;
-
- maxValues = _mm_set1_epi8(max);
-
- for(unsigned int number = 0; number < sse_iters; number++)
+static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points)
+{
+ if(num_points > 0)
{
- currentValues = _mm_loadu_si128((__m128i*)inputPtr);
- compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
- mask = _mm_movemask_epi8(compareResults);
-
- if (mask != 0xFFFF)
- {
- _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
- mask = ~mask;
- unsigned int i = 0;
- while (mask > 0)
+ const unsigned int sse_iters = num_points / 16;
+
+ char* basePtr = (char*)src0;
+ char* inputPtr = (char*)src0;
+ char max = src0[0];
+ unsigned int index = 0;
+ unsigned short mask;
+ __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
+ __m128i maxValues, compareResults, currentValues;
+
+ maxValues = _mm_set1_epi8(max);
+
+ for(unsigned int number = 0; number < sse_iters; number++)
{
- if ((mask & 1) == 1)
- {
- if(currentValuesBuffer[i] > max)
+ currentValues = _mm_loadu_si128((__m128i*)inputPtr);
+ compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
+ mask = _mm_movemask_epi8(compareResults);
+
+ if (mask != 0xFFFF)
{
- index = inputPtr - basePtr + i;
- max = currentValuesBuffer[i];
+ _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
+ mask = ~mask;
+ unsigned int i = 0;
+ while (mask > 0)
+ {
+ if ((mask & 1) == 1)
+ {
+ if(currentValuesBuffer[i] > max)
+ {
+ index = inputPtr - basePtr + i;
+ max = currentValuesBuffer[i];
+ }
+ }
+ i++;
+ mask >>= 1;
+ }
+ maxValues = _mm_set1_epi8(max);
}
- }
- i++;
- mask >>= 1;
+ inputPtr += 16;
}
- maxValues = _mm_set1_epi8(max);
- }
- inputPtr += 16;
- }
-
- for(unsigned int i = 0; i<(num_points % 16); ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+
+ for(unsigned int i = 0; i<(num_points % 16); ++i)
+ {
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
+ }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_SSE2*/
@@ -237,36 +244,27 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) {
-
+static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points)
+{
if(num_points > 0)
- {
- char max = src0[0];
- unsigned int index = 0;
-
- for(unsigned int i = 1; i < num_points; ++i)
{
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+ char max = src0[0];
+ unsigned int index = 0;
+
+ for(unsigned int i = 1; i < num_points; ++i)
+ {
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
+ }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_GENERIC*/
-#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H*/
-
-
-#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
-#define INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
-
-#include <volk_gnsssdr/volk_gnsssdr_common.h>
-#include <inttypes.h>
-#include <stdio.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
@@ -276,62 +274,64 @@ static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, c
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) {
- if(num_points > 0){
- const unsigned int sse_iters = num_points / 32;
-
- char* basePtr = (char*)src0;
- char* inputPtr = (char*)src0;
- char max = src0[0];
- unsigned int index = 0;
- __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
- __m256i ones, compareResults, currentValues;
- __m128i compareResultslo, compareResultshi, maxValues, lo, hi;
-
- ones = _mm256_set1_epi8(0xFF);
- maxValues = _mm_set1_epi8(max);
-
- for(unsigned int number = 0; number < sse_iters; number++)
+static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points)
+{
+ if(num_points > 0)
{
- currentValues = _mm256_load_si256((__m256i*)inputPtr);
-
- lo = _mm256_castsi256_si128(currentValues);
- hi = _mm256_extractf128_si256(currentValues,1);
-
- compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
- compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
-
- //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
- compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
-
- if (!_mm256_testc_si256(compareResults, ones))
- {
- _mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues);
-
- for(unsigned int i = 0; i < 32; i++)
+ const unsigned int sse_iters = num_points / 32;
+
+ char* basePtr = (char*)src0;
+ char* inputPtr = (char*)src0;
+ char max = src0[0];
+ unsigned int index = 0;
+ __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
+ __m256i ones, compareResults, currentValues;
+ __m128i compareResultslo, compareResultshi, maxValues, lo, hi;
+
+ ones = _mm256_set1_epi8(0xFF);
+ maxValues = _mm_set1_epi8(max);
+
+ for(unsigned int number = 0; number < sse_iters; number++)
{
- if(currentValuesBuffer[i] > max)
- {
- index = inputPtr - basePtr + i;
- max = currentValuesBuffer[i];
- }
+ currentValues = _mm256_load_si256((__m256i*)inputPtr);
+
+ lo = _mm256_castsi256_si128(currentValues);
+ hi = _mm256_extractf128_si256(currentValues,1);
+
+ compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
+ compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
+
+ //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
+ compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo), (compareResultshi), 1);
+
+ if (!_mm256_testc_si256(compareResults, ones))
+ {
+ _mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues);
+
+ for(unsigned int i = 0; i < 32; i++)
+ {
+ if(currentValuesBuffer[i] > max)
+ {
+ index = inputPtr - basePtr + i;
+ max = currentValuesBuffer[i];
+ }
+ }
+ maxValues = _mm_set1_epi8(max);
+ }
+
+ inputPtr += 32;
}
- maxValues = _mm_set1_epi8(max);
- }
-
- inputPtr += 32;
- }
-
- for(unsigned int i = 0; i<(num_points % 32); ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+
+ for(unsigned int i = 0; i<(num_points % 32); ++i)
+ {
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
+ }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_AVX*/
@@ -344,53 +344,55 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
- if(num_points > 0){
- const unsigned int sse_iters = num_points / 16;
-
- char* basePtr = (char*)src0;
- char* inputPtr = (char*)src0;
- char max = src0[0];
- unsigned int index = 0;
- __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
- __m128i maxValues, compareResults, currentValues;
-
- maxValues = _mm_set1_epi8(max);
-
- for(unsigned int number = 0; number < sse_iters; number++)
+static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
+{
+ if(num_points > 0)
{
- currentValues = _mm_load_si128((__m128i*)inputPtr);
-
- compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
-
- if (!_mm_test_all_ones(compareResults))
- {
- _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
-
- for(unsigned int i = 0; i < 16; i++)
+ const unsigned int sse_iters = num_points / 16;
+
+ char* basePtr = (char*)src0;
+ char* inputPtr = (char*)src0;
+ char max = src0[0];
+ unsigned int index = 0;
+ __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
+ __m128i maxValues, compareResults, currentValues;
+
+ maxValues = _mm_set1_epi8(max);
+
+ for(unsigned int number = 0; number < sse_iters; number++)
{
- if(currentValuesBuffer[i] > max)
- {
- index = inputPtr - basePtr + i;
- max = currentValuesBuffer[i];
- }
+ currentValues = _mm_load_si128((__m128i*)inputPtr);
+
+ compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
+
+ if (!_mm_test_all_ones(compareResults))
+ {
+ _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
+
+ for(unsigned int i = 0; i < 16; i++)
+ {
+ if(currentValuesBuffer[i] > max)
+ {
+ index = inputPtr - basePtr + i;
+ max = currentValuesBuffer[i];
+ }
+ }
+ maxValues = _mm_set1_epi8(max);
+ }
+
+ inputPtr += 16;
}
- maxValues = _mm_set1_epi8(max);
- }
-
- inputPtr += 16;
- }
-
- for(unsigned int i = 0; i<(num_points % 16); ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+
+ for(unsigned int i = 0; i<(num_points % 16); ++i)
+ {
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
+ }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_SSE4_1*/
@@ -403,89 +405,65 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target,
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
-static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
- if(num_points > 0){
- const unsigned int sse_iters = num_points / 16;
-
- char* basePtr = (char*)src0;
- char* inputPtr = (char*)src0;
- char max = src0[0];
- unsigned int index = 0;
- unsigned short mask;
- __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
- __m128i maxValues, compareResults, currentValues;
-
- maxValues = _mm_set1_epi8(max);
-
- for(unsigned int number = 0; number < sse_iters; number++)
+static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points)
+{
+ if(num_points > 0)
{
- currentValues = _mm_load_si128((__m128i*)inputPtr);
- compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
- mask = _mm_movemask_epi8(compareResults);
-
- if (mask != 0xFFFF)
- {
- _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
- mask = ~mask;
- unsigned int i = 0;
- while (mask > 0)
+ const unsigned int sse_iters = num_points / 16;
+
+ char* basePtr = (char*)src0;
+ char* inputPtr = (char*)src0;
+ char max = src0[0];
+ unsigned int index = 0;
+ unsigned short mask;
+ __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
+ __m128i maxValues, compareResults, currentValues;
+
+ maxValues = _mm_set1_epi8(max);
+
+ for(unsigned int number = 0; number < sse_iters; number++)
{
- if ((mask & 1) == 1)
- {
- if(currentValuesBuffer[i] > max)
+ currentValues = _mm_load_si128((__m128i*)inputPtr);
+ compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
+ mask = _mm_movemask_epi8(compareResults);
+
+ if (mask != 0xFFFF)
{
- index = inputPtr - basePtr + i;
- max = currentValuesBuffer[i];
+ _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
+ mask = ~mask;
+ unsigned int i = 0;
+ while (mask > 0)
+ {
+ if ((mask & 1) == 1)
+ {
+ if(currentValuesBuffer[i] > max)
+ {
+ index = inputPtr - basePtr + i;
+ max = currentValuesBuffer[i];
+ }
+ }
+ i++;
+ mask >>= 1;
+ }
+ maxValues = _mm_set1_epi8(max);
}
- }
- i++;
- mask >>= 1;
+ inputPtr += 16;
}
- maxValues = _mm_set1_epi8(max);
- }
- inputPtr += 16;
- }
-
- for(unsigned int i = 0; i<(num_points % 16); ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
+
+ for(unsigned int i = 0; i<(num_points % 16); ++i)
+ {
+ if(src0[i] > max)
+ {
+ index = i;
+ max = src0[i];
+ }
+ }
+ target[0] = index;
}
- target[0] = index;
- }
}
#endif /*LV_HAVE_SSE2*/
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Returns the index of the max value in src0
- \param target The index of the max value in src0
- \param src0 The buffer of data to be analysed
- \param num_points The number of values in src0 to be analysed
- */
-static inline void volk_gnsssdr_8i_index_max_16u_a_generic(unsigned int* target, const char* src0, unsigned int num_points) {
-
- if(num_points > 0)
- {
- char max = src0[0];
- unsigned int index = 0;
-
- for(unsigned int i = 1; i < num_points; ++i)
- {
- if(src0[i] > max)
- {
- index = i;
- max = src0[i];
- }
- }
- target[0] = index;
- }
-}
-#endif /*LV_HAVE_GENERIC*/
-#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H*/
+#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h
index 7b5d939..156fe6a 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h
@@ -32,11 +32,13 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
-#define INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
+#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_H
+#define INCLUDED_volk_gnsssdr_8i_max_s8i_H
+
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
+#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
@@ -179,15 +181,8 @@ static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src
#endif /*LV_HAVE_GENERIC*/
-#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_u_H*/
-#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
-#define INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
-
-#include <volk_gnsssdr/volk_gnsssdr_common.h>
-#include <inttypes.h>
-#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
@@ -304,29 +299,5 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0
#endif /*LV_HAVE_SSE2*/
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Returns the max value in src0
- \param target The max value in src0
- \param src0 The buffer of data to be analysed
- \param num_points The number of values in src0 to be analysed
- */
-static inline void volk_gnsssdr_8i_max_s8i_a_generic(char* target, const char* src0, unsigned int num_points)
-{
- if(num_points > 0)
- {
- char max = src0[0];
- for(unsigned int i = 1; i < num_points; ++i)
- {
- if(src0[i] > max)
- {
- max = src0[i];
- }
- }
- target[0] = max;
- }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_a_H*/
+#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h
index bf20d59..b23dd3a 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h
@@ -32,10 +32,11 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
-#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
+#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
+#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
#include <inttypes.h>
+#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -99,14 +100,6 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char*
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
-#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -148,27 +141,6 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a
}
#endif /* LV_HAVE_SSE2 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Adds the two input vectors and store their results in the third vector
- \param cVector The vector where the results will be stored
- \param aVector One of the vectors to be added
- \param bVector One of the vectors to be added
- \param num_points The number of values in aVector and bVector to be added together and stored into cVector
- */
-static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
-{
- char* cPtr = cVector;
- const char* aPtr = aVector;
- const char* bPtr= bVector;
- unsigned int number = 0;
-
- for(; number < num_points; number++)
- {
- *cPtr++ = (*aPtr++) + (*bPtr++);
- }
-}
-#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@@ -185,4 +157,4 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aV
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H */
+#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h
index c8424e2..57faf5e 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h
@@ -33,10 +33,11 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
-#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
+#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
+#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
#include <inttypes.h>
+#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_AVX
@@ -174,15 +175,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
-#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
@@ -299,25 +291,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
}
#endif /* LV_HAVE_SSE3 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Takes the conjugate of an unsigned char vector.
- \param cVector The vector where the results will be stored
- \param aVector Vector to be conjugated
- \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
- */
-static inline void volk_gnsssdr_8ic_conjugate_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
-{
- lv_8sc_t* cPtr = cVector;
- const lv_8sc_t* aPtr = aVector;
- unsigned int number = 0;
-
- for(number = 0; number < num_points; number++)
- {
- *cPtr++ = lv_conj(*aPtr++);
- }
-}
-#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@@ -333,4 +306,4 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H */
+#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h
index c8114e8..63b034c 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h
@@ -34,10 +34,11 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
-#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
+#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
+#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
#include <inttypes.h>
+#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSSE3
@@ -166,15 +167,6 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitude
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
-#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
#ifdef LV_HAVE_SSSE3
#include <tmmintrin.h>
@@ -281,26 +273,6 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
//}
//#endif /* LV_HAVE_SSE */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
- \param complexVector The vector containing the complex input values
- \param magnitudeVector The vector containing the real output values
- \param num_points The number of complex values in complexVector to be calculated and stored into cVector
- */
-static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
-{
- const char* complexVectorPtr = (char*)complexVector;
- char* magnitudeVectorPtr = magnitudeVector;
-
- for(unsigned int number = 0; number < num_points; number++)
- {
- const char real = *complexVectorPtr++;
- const char imag = *complexVectorPtr++;
- *magnitudeVectorPtr++ = (real*real) + (imag*imag);
- }
-}
-#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@@ -316,4 +288,4 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVe
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */
+#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h
index 89e41fd..4f38a9f 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h
@@ -33,10 +33,11 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
-#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
+#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
+#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
#include <inttypes.h>
+#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
@@ -143,16 +144,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
-#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk_gnsssdr/volk_gnsssdr_complex.h>
-#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@@ -215,46 +206,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_SSE3 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Multiplies the input vector by a scalar and stores the results in the third vector
- \param cVector The vector where the results will be stored
- \param aVector The vector to be multiplied
- \param scalar The complex scalar to multiply aVector
- \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
- */
-static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
-{
- /*lv_8sc_t* cPtr = cVector;
- const lv_8sc_t* aPtr = aVector;
-
- for (int i = 0; i<num_points; ++i)
- {
- *cPtr++ = (*aPtr++) * scalar;
- }*/
-
- lv_8sc_t* cPtr = cVector;
- const lv_8sc_t* aPtr = aVector;
- unsigned int number = num_points;
-
- // unwrap loop
- while (number >= 8){
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- number -= 8;
- }
-
- // clean up any remaining
- while (number-- > 0)
- *cPtr++ = *aPtr++ * scalar;
-}
-#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@@ -271,4 +222,4 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, c
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h
index a753a25..e8d21ba 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h
@@ -33,9 +33,10 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
-#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
+#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
+#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
+#include <stdio.h>
#include <string.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
@@ -251,69 +252,6 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
#endif /*LV_HAVE_SSE4_1*/
-#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H*/
-
-
-#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
-#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
-
-#include <volk_gnsssdr/volk_gnsssdr_common.h>
-#include <volk_gnsssdr/volk_gnsssdr_complex.h>
-#include <stdio.h>
-#include <string.h>
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
- \param cVector The vector where the accumulated result will be stored
- \param aVector One of the vectors to be multiplied and accumulated
- \param bVector One of the vectors to be multiplied and accumulated
- \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
- */
-static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
-{
- // lv_8sc_t* cPtr = result;
- // const lv_8sc_t* aPtr = input;
- // const lv_8sc_t* bPtr = taps;
- //
- // for(int number = 0; number < num_points; number++)
- // {
- // *cPtr += (*aPtr++) * (*bPtr++);
- // }
-
- char * res = (char*) result;
- char * in = (char*) input;
- char * tp = (char*) taps;
- unsigned int n_2_ccomplex_blocks = num_points/2;
- unsigned int isodd = num_points & 1;
-
- char sum0[2] = {0,0};
- char sum1[2] = {0,0};
- unsigned int i = 0;
-
- for(i = 0; i < n_2_ccomplex_blocks; ++i)
- {
- sum0[0] += in[0] * tp[0] - in[1] * tp[1];
- sum0[1] += in[0] * tp[1] + in[1] * tp[0];
- sum1[0] += in[2] * tp[2] - in[3] * tp[3];
- sum1[1] += in[2] * tp[3] + in[3] * tp[2];
-
- in += 4;
- tp += 4;
- }
-
- res[0] = sum0[0] + sum1[0];
- res[1] = sum0[1] + sum1[1];
-
- // Cleanup if we had an odd number of points
- for(i = 0; i < isodd; ++i)
- {
- *result += input[num_points - 1] * taps[num_points - 1];
- }
-}
-
-#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -500,4 +438,4 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
}
#endif /* LV_HAVE_ORC */
-#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H*/
+#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h
index 4e2971d..a5cb3a1 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h
@@ -33,10 +33,11 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
-#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
+#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
+#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
#include <inttypes.h>
+#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_SSE2
@@ -180,15 +181,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
-#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@@ -310,27 +302,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_SSE4_1 */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Multiplies the two input complex vectors and stores their results in the third vector
- \param cVector The vector where the results will be stored
- \param aVector One of the vectors to be multiplied
- \param bVector One of the vectors to be multiplied
- \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
- */
-static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
-{
- lv_8sc_t* cPtr = cVector;
- const lv_8sc_t* aPtr = aVector;
- const lv_8sc_t* bPtr = bVector;
-
- for(unsigned int number = 0; number < num_points; number++)
- {
- *cPtr++ = (*aPtr++) * (*bPtr++);
- }
-
-}
-#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@@ -347,4 +318,4 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, con
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H */
+#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h
index a9e7831..cb58d55 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h
@@ -32,10 +32,11 @@
* -------------------------------------------------------------------------
*/
-#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
-#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
+#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
+#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
#include <inttypes.h>
+#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@@ -112,14 +113,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar,
}
#endif /* LV_HAVE_GENERIC */
-#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H */
-
-
-#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
-#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@@ -176,26 +169,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
}
#endif /* LV_HAVE_SSE */
-#ifdef LV_HAVE_GENERIC
-/*!
- \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
- \param cChar The unsigned char where the results will be stored
- \param aChar One of the unsigned char to be multiplied
- \param bChar One of the unsigned char to be multiplied
- \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
- */
-static inline void volk_gnsssdr_8u_x2_multiply_8u_a_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
-{
- unsigned char* cPtr = cChar;
- const unsigned char* aPtr = aChar;
- const unsigned char* bPtr = bChar;
-
- for(unsigned int number = 0; number < num_points; number++)
- {
- *cPtr++ = (*aPtr++) * (*bPtr++);
- }
-}
-#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@@ -212,4 +185,4 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector,
}
#endif /* LV_HAVE_ORC */
-#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H */
+#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list