[hamradio-commits] [gnss-sdr] 128/149: adding neon implementation
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:12 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit d69e8e34f60de8247d5d4c0fb92165519abdfff7
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Thu Jan 28 19:45:31 2016 +0100
adding neon implementation
---
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h | 86 ++++++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
index 86be0c7..773694e 100755
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
@@ -190,4 +190,90 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
}
#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
+{
+ const unsigned int neon_iters = num_points / 8;
+
+ float32_t* inputVectorPtr = (float32_t*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+
+ const float32x4_t min_val = vmovq_n_f32((float32_t)SCHAR_MIN);
+ const float32x4_t max_val = vmovq_n_f32((float32_t)SCHAR_MAX);
+
+ const float32x4_t half = vdupq_n_f32(0.5f);
+
+ float32x4_t sign, PlusHalf, Round, ret1, a;
+ int32x4_t toint_a;
+ int16x4_t intInputVal1, intInputVal2;
+ int16x8_t pack16_8_1;
+ int8x8_t res8_1, res8_2;
+ int8x16_t outputVal;
+
+ for(unsigned int i = 0; i < neon_iters; i++)
+ {
+ a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
+ ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
+ sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
+ PlusHalf = vaddq_f32(ret1, half);
+ Round = vsubq_f32(PlusHalf, sign);
+ toint_a = vcvtq_s32_f32(Round);
+ intInputVal1 = vqmovn_s32(toint_a);
+
+ a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
+ ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
+ sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
+ PlusHalf = vaddq_f32(ret1, half);
+ Round = vsubq_f32(PlusHalf, sign);
+ toint_a = vcvtq_s32_f32(Round);
+ intInputVal2 = vqmovn_s32(toint_a);
+
+ pack16_8_1 = vcombine_s16(intInputVal1, intInputVal2);
+ res8_1 = vqmovn_s16(pack16_8_1);
+
+ a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
+ ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
+ sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
+ PlusHalf = vaddq_f32(ret1, half);
+ Round = vsubq_f32(PlusHalf, sign);
+ toint_a = vcvtq_s32_f32(Round);
+ intInputVal1 = vqmovn_s32(toint_a);
+
+ a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
+ ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
+ sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
+ PlusHalf = vaddq_f32(ret1, half);
+ Round = vsubq_f32(PlusHalf, sign);
+ toint_a = vcvtq_s32_f32(Round);
+ intInputVal2 = vqmovn_s32(toint_a);
+
+ pack16_8_1 = vcombine_s16(intInputVal1, intInputVal2);
+ res8_2 = vqmovn_s16(pack16_8_1);
+
+ outputVal = vcombine_s8(res8_1, res8_2);
+
+ vst1q_s8((int8_t*)outputVectorPtr, outputVal);
+ outputVectorPtr += 16;
+ }
+
+ for(unsigned int i = neon_iters * 16; i < num_points * 2; i++)
+ {
+ if(inputVectorPtr[i] > (float32_t)SCHAR_MAX)
+ inputVectorPtr[i] = (float32_t)SCHAR_MAX;
+ else if(inputVectorPtr[i] < (float32_t)SCHAR_MIN)
+ inputVectorPtr[i] = (float32_t)SCHAR_MIN;
+ *outputVectorPtr++ = (int8_t)rintf(*inputVectorPtr++);
+ }
+}
+
+#endif /* LV_HAVE_NEON */
+
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_H */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list