[hamradio-commits] [gnss-sdr] 93/149: add unaligned version
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:06 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit 497c856437fdbd9402e6410f9c2265276e64922a
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Wed Jan 20 18:38:33 2016 +0100
add unaligned version
---
.../volk_gnsssdr_16ic_x2_multiply_16ic.h | 51 ++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h
index 1da3ccb..0c6374a 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h
@@ -77,6 +77,57 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
//std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i]
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
+ a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
+ b = _mm_load_si128((__m128i*)_in_b);
+ c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, ....
+
+ c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
+ real = _mm_subs_epi16 (c, c_sr);
+ real = _mm_and_si128 (real, mask_real); // a3.r*b3.r-a3.i*b3.i , 0, a3.r*b3.r- a3.i*b3.i
+
+ b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
+ a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
+
+ imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
+ imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
+
+ imag = _mm_adds_epi16(imag1, imag2);
+ imag = _mm_and_si128 (imag, mask_imag); // a3.i*b3.r+b3.i*a3.r, 0, ...
+
+ result = _mm_or_si128 (real, imag);
+
+ _mm_store_si128((__m128i*)_out, result);
+
+ _in_a += 4;
+ _in_b += 4;
+ _out += 4;
+ }
+
+ for (unsigned int i = sse_iters * 4; i < num_points; ++i)
+ {
+ *_out++ = (*_in_a++) * (*_in_b++);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 4;
+ __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
+
+ mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
+ mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
+
+ const lv_16sc_t* _in_a = in_a;
+ const lv_16sc_t* _in_b = in_b;
+ lv_16sc_t* _out = out;
+ for(unsigned int number = 0; number < sse_iters; number++)
+ {
+ //std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i]
+ //imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
+ // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
b = _mm_loadu_si128((__m128i*)_in_b);
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, ....
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list