[hamradio-commits] [gnss-sdr] 69/149: fix sse implementations
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:04 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit 46e3ce5ec24563e3679c0354421be943c04b325e
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Sat Jan 16 22:39:15 2016 +0100
fix sse implementations
---
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h | 48 ++++++++++++----------
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h | 19 +++++----
2 files changed, 37 insertions(+), 30 deletions(-)
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
index a7c1798..97bb2de 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
@@ -46,8 +46,9 @@
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
-static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
- const unsigned int sse_iters = num_points/4;
+static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
@@ -61,7 +62,8 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
- for(unsigned int i = 0;i < sse_iters; i++){
+ for(unsigned int i = 0; i < sse_iters; i++)
+ {
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
@@ -76,29 +78,30 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
- }
+ }
- for(unsigned int i = 0; i < (num_points%4)*2; i++)
+ for(unsigned int i = sse_iters * 8; i < num_points * 2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ *outputVectorPtr++ = (int16_t)rintf(*inputVectorPtr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
+#include <xmmintrin.h> // __m64, __m128 ??
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
-static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
- const unsigned int sse_iters = num_points/4;
+static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
@@ -112,7 +115,8 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
- for(unsigned int i = 0;i < sse_iters; i++){
+ for(unsigned int i = 0;i < sse_iters; i++)
+ {
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
@@ -127,15 +131,15 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
- }
+ }
- for(unsigned int i = 0; i < (num_points%4)*2; i++)
+ for(unsigned int i = sse_iters * 8; i < num_points*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ *outputVectorPtr++ = (int16_t)rintf(*inputVectorPtr++);
}
}
#endif /* LV_HAVE_SSE */
@@ -147,7 +151,8 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
-static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
+{
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
@@ -178,8 +183,9 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
-static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
- const unsigned int sse_iters = num_points/4;
+static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
@@ -193,7 +199,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
- for(unsigned int i = 0;i < sse_iters; i++)
+ for(unsigned int i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
@@ -211,13 +217,13 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
outputVectorPtr += 8;
}
- for(unsigned int i = 0; i < (num_points%4)*2; i++)
+ for(unsigned int i = sse_iters * 8; i < num_points * 2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ *outputVectorPtr++ = (int16_t)rintf(*inputVectorPtr++);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -264,13 +270,13 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
outputVectorPtr += 8;
}
- for(unsigned int i = 0; i < (num_points%4)*2; i++)
+ for(unsigned int i = sse_iters * 8; i < num_points * 2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ *outputVectorPtr++ = (int16_t)rintf(*inputVectorPtr++);
}
}
#endif /* LV_HAVE_SSE */
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
index 1bea025..94cc2ec 100755
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
@@ -49,7 +49,8 @@
*/
static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
- const unsigned int sse_iters = num_points/8;
+ unsigned i = 0;
+ const unsigned int sse_iters = num_points * 2 / 16;
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
@@ -64,7 +65,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
- for(unsigned int i = 0;i < sse_iters; i++)
+ for(;i < sse_iters; i++)
{
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
@@ -90,13 +91,13 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
outputVectorPtr += 16;
}
- for(unsigned int i = 0; i < (num_points%8)*2; i++)
+ for(i = sse_iters * 16; i < num_points * 2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
+ *outputVectorPtr++ = (int8_t)rintf(*inputVectorPtr++);
}
}
#endif /* LV_HAVE_SSE2 */
@@ -115,7 +116,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
float min_val = -128;
float max_val = 127;
- for(unsigned int i = 0; i < num_points*2; i++)
+ for(unsigned int i = 0; i < num_points * 2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
@@ -142,7 +143,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
*/
static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
- const unsigned int sse_iters = num_points/8;
+ const unsigned int sse_iters = num_points / 8;
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
@@ -157,7 +158,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
- for(unsigned int i = 0;i < sse_iters; i++)
+ for(unsigned int i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
@@ -183,13 +184,13 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
outputVectorPtr += 16;
}
- for(unsigned int i = 0; i < (num_points%8)*2; i++)
+ for(unsigned int i = sse_iters * 16; i < num_points * 2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
- outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
+ *outputVectorPtr++ = (int8_t)rintf(*inputVectorPtr++);
}
}
#endif /* LV_HAVE_SSE2 */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list