[hamradio-commits] [gnss-sdr] 61/149: Moving two kernels to volk_gnsssdr. Still no testing
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:03 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit ae2b594c3b05402ad98faadd7eac986fd3642aa5
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Wed Jan 13 19:38:07 2016 +0100
Moving two kernels to volk_gnsssdr. Still no testing
---
.../include/volk_gnsssdr/saturated_arithmetic.h | 36 ++--
.../volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h} | 14 +-
.../volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 171 +++++++++++++++++++
.../volk_gnsssdr/lib/kernel_tests.h | 15 +-
.../volk_gnsssdr/lib/qa_utils.cc | 183 +++++++++++++++------
.../volk_gnsssdr/lib/qa_utils.h | 6 +-
.../tracking/libs/cpu_multicorrelator_16sc.cc | 11 +-
.../libs/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 172 -------------------
8 files changed, 340 insertions(+), 268 deletions(-)
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturated_arithmetic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturated_arithmetic.h
index b9f95d4..11f3f1c 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturated_arithmetic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturated_arithmetic.h
@@ -5,27 +5,27 @@
//#include <types.h>
static inline int16_t sat_adds16b(int16_t x, int16_t y)
{
-// int16_t ux = x;
-// int16_t uy = y;
-// int16_t res = ux + uy;
-//
-// /* Calculate overflowed result. (Don't change the sign bit of ux) */
-// ux = (ux >> 15) + SHRT_MAX;
-//
-// /* Force compiler to use cmovns instruction */
-// if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
-// {
-// res = ux;
-// }
-//
-// return res;
+ // int16_t ux = x;
+ // int16_t uy = y;
+ // int16_t res = ux + uy;
+ //
+ // /* Calculate overflowed result. (Don't change the sign bit of ux) */
+ // ux = (ux >> 15) + SHRT_MAX;
+ //
+ // /* Force compiler to use cmovns instruction */
+ // if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
+ // {
+ // res = ux;
+ // }
+ //
+ // return res;
- int32_t res = (int32_t) x + (int32_t) y;
+ int32_t res = (int32_t) x + (int32_t) y;
- if (res < SHRT_MIN) res = SHRT_MIN;
- if (res > SHRT_MAX) res = SHRT_MAX;
+ if (res < SHRT_MIN) res = SHRT_MIN;
+ if (res > SHRT_MAX) res = SHRT_MAX;
- return res;
+ return res;
}
#endif /*SATURATED_ARITHMETIC_H_*/
diff --git a/src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
similarity index 94%
rename from src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_dot_prod_16ic_xn.h
rename to src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
index b133bac..7f9986e 100644
--- a/src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_dot_prod_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
@@ -48,7 +48,7 @@
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
-static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors)
+static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors)
{
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
@@ -57,7 +57,7 @@ static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_generic(lv_16sc_t* resu
{
//r*a.r - i*a.i, i*a.r + r*a.i
//result[n_vec]+=in_common[n]*in_a[n_vec][n];
- lv_16sc_t tmp = in_common[n]*in_a[n_vec][n];
+ lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
result[n_vec] = lv_cmake(sat_adds16b(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16b(lv_cimag(result[n_vec]), lv_cimag(tmp)));
}
}
@@ -68,7 +68,7 @@ static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_generic(lv_16sc_t* resu
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
-static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors)
+static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@@ -148,13 +148,13 @@ static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
- for(unsigned int n = sse_iters * 4;n < num_points; n++){
-
- lv_16sc_t tmp = in_common[n]*in_a[n_vec][n];
+ for(unsigned int n = sse_iters * 4; n < num_points; n++)
+ {
+ lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
_out[n_vec] = lv_cmake(sat_adds16b(lv_creal(_out[n_vec]), lv_creal(tmp)),
sat_adds16b(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
- }
+ }
}
}
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
new file mode 100644
index 0000000..c53075a
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
@@ -0,0 +1,171 @@
+/*!
+ * \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
+ * \brief Volk protokernel: resample a 16 bits complex vector
+ * \authors <ul>
+ * <li> Javier Arribas, 2015. jarribas(at)cttc.es
+ * </ul>
+ *
+ * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
+ * and 8 bits the imaginary part) and accumulates them
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
+#define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
+
+#include <math.h>
+#include <volk_gnsssdr/volk_gnsssdr_common.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+
+//#pragma STDC FENV_ACCESS ON
+
+#ifdef LV_HAVE_GENERIC
+
+//int round_int( float r ) {
+// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
+//}
+/*!
+ \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
+ \param cVector The vector where the result will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
+ */
+
+static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
+{
+ int local_code_chip_index;
+ //fesetround(FE_TONEAREST);
+ for (int current_vector = 0; current_vector < num_out_vectors; current_vector++)
+ {
+ for (unsigned int n = 0; n < num_output_samples; n++)
+ {
+ // resample code for current tap
+ local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f);
+ if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
+ if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips;
+ //std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
+ result[current_vector][n] = local_code[local_code_chip_index];
+ }
+ }
+ //std::cout<<std::endl;
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
+{
+ _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
+ unsigned int number;
+ const unsigned int quarterPoints = num_output_samples / 4;
+
+ lv_16sc_t** _result = result;
+ __attribute__((aligned(16))) int local_code_chip_index[4];
+ float tmp_rem_code_phase_chips;
+ __m128 _rem_code_phase,_code_phase_step_chips;
+ __m128i _code_length_chips,_code_length_chips_minus1;
+ __m128 _code_phase_out,_code_phase_out_with_offset;
+
+ _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
+ __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
+ four_times_code_length_chips_minus1[0] = code_length_chips - 1;
+ four_times_code_length_chips_minus1[1] = code_length_chips - 1;
+ four_times_code_length_chips_minus1[2] = code_length_chips - 1;
+ four_times_code_length_chips_minus1[3] = code_length_chips - 1;
+
+ __attribute__((aligned(16))) int four_times_code_length_chips[4];
+ four_times_code_length_chips[0] = code_length_chips;
+ four_times_code_length_chips[1] = code_length_chips;
+ four_times_code_length_chips[2] = code_length_chips;
+ four_times_code_length_chips[3] = code_length_chips;
+
+ _code_length_chips = _mm_loadu_si128((__m128i*)&four_times_code_length_chips); //load float to all four float values in m128 register
+ _code_length_chips_minus1 = _mm_loadu_si128((__m128i*)&four_times_code_length_chips_minus1); //load float to all four float values in m128 register
+
+ __m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
+
+ __m128i zero=_mm_setzero_si128();
+
+ __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
+ __m128 _4output_index = _mm_load_ps(init_idx_float);
+ __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
+ __m128 _4constant_float = _mm_load_ps(init_4constant_float);
+
+ int current_vector = 0;
+ int sample_idx = 0;
+ for(number = 0; number < quarterPoints; number++)
+ {
+ //common to all outputs
+ _code_phase_out = _mm_mul_ps(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
+
+ //output vector dependant (different code phase offset)
+ for(current_vector = 0; current_vector < num_out_vectors; current_vector++)
+ {
+ tmp_rem_code_phase_chips = rem_code_phase_chips[current_vector] - 0.5f; // adjust offset to perform correct rounding (chip transition at 0)
+ _rem_code_phase = _mm_load1_ps(&tmp_rem_code_phase_chips); //load float to all four float values in m128 register
+
+ _code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
+ _code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
+
+ negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
+ _code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
+ _code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
+
+ overflow_indexes = _mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
+ _code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
+ _code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
+
+ _mm_storeu_si128((__m128i*)local_code_chip_index, _code_phase_out_int_over); // Store the results back
+
+ //todo: optimize the local code lookup table with intrinsics, if possible
+ _result[current_vector][sample_idx] = local_code[local_code_chip_index[0]];
+ _result[current_vector][sample_idx + 1] = local_code[local_code_chip_index[1]];
+ _result[current_vector][sample_idx + 2] = local_code[local_code_chip_index[2]];
+ _result[current_vector][sample_idx + 3] = local_code[local_code_chip_index[3]];
+ }
+ _4output_index = _mm_add_ps(_4output_index, _4constant_float);
+ sample_idx += 4;
+ }
+
+ for(number = quarterPoints * 4; number < num_output_samples; number++)
+ {
+ for(current_vector = 0; current_vector < num_out_vectors; current_vector++)
+ {
+ local_code_chip_index[0] = (int)(code_phase_step_chips * (float)(number) + rem_code_phase_chips[current_vector]);
+ if (local_code_chip_index[0] < 0.0) local_code_chip_index[0] += code_length_chips - 1;
+ if (local_code_chip_index[0] > (code_length_chips - 1)) local_code_chip_index[0] -= code_length_chips;
+ _result[current_vector][number] = local_code[local_code_chip_index[0]];
+ }
+
+ }
+
+}
+#endif /* LV_HAVE_SSE2 */
+
+#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h
index 298d45c..c06b7f0 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h
@@ -57,16 +57,7 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
std::vector<volk_gnsssdr_test_case_t> test_cases = boost::assign::list_of
- // no one uses these, so don't test them
- //VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
- // we need a puppet for this one
- //(VOLK_INIT_TEST(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, test_params))
- //(VOLK_INIT_TEST(volk_gnsssdr_32f_null_32f, test_params))
+
(VOLK_INIT_TEST(volk_gnsssdr_8i_accumulator_s8i, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8i_index_max_16u, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8i_max_s8i, test_params))
@@ -77,11 +68,13 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
(VOLK_INIT_TEST(volk_gnsssdr_8ic_x2_multiply_8ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8u_x2_multiply_8u, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_64f_accumulator_64f, test_params))
- (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params))
+ (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params_int1))
(VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_16ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_multiply_16ic, test_params))
+ (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, volk_gnsssdr_test_params_t(1e-2, test_params.scalar(), test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex())))
//(VOLK_INIT_TEST(volk_gnsssdr_16ic_resampler_16ic, test_params))
+ //(VOLK_INIT_TEST(volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params))
;
return test_cases;
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.cc b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.cc
index 3bf8141..5eac77f 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.cc
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.cc
@@ -49,39 +49,47 @@ void random_floats (t *buf, unsigned n)
buf[i] = uniform ();
}
-void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) {
+void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
+{
if(type.is_complex) n *= 2;
- if(type.is_float) {
- if(type.size == 8) random_floats<double>((double *)data, n);
- else random_floats<float>((float *)data, n);
- } else {
- float int_max = float(uint64_t(2) << (type.size*8));
- if(type.is_signed) int_max /= 2.0;
- for(unsigned int i=0; i<n; i++) {
- float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
- //man i really don't know how to do this in a more clever way, you have to cast down at some point
- switch(type.size) {
- case 8:
- if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
- else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
- break;
- case 4:
- if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
- else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
- break;
- case 2:
- if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
- else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
- break;
- case 1:
- if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
- else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
- break;
- default:
- throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
- }
+ if(type.is_float)
+ {
+ if(type.size == 8) random_floats<double>((double *)data, n);
+ else random_floats<float>((float *)data, n);
+ }
+ else
+ {
+ float int_max = float(uint64_t(2) << (type.size*8));
+ if(type.is_signed) int_max /= 2.0;
+ for(unsigned int i = 0; i < n; i++)
+ {
+ float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
+ //man i really don't know how to do this in a more clever way, you have to cast down at some point
+ switch(type.size)
+ {
+ case 8:
+ if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
+ else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
+ break;
+ case 4:
+
+ if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
+ else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
+
+ break;
+ case 2:
+ if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand / 11; //sqrt(std::abs(scaled_rand / 2.0)); //// std::cout << "222222222222" << std::endl;}
+ else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
+ break;
+ case 1:
+ if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
+ else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
+ break;
+ default:
+ throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
+ }
+ }
}
- }
}
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc) {
@@ -172,15 +180,21 @@ static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
if(side == SIDE_INPUT) inputsig.push_back(type);
else outputsig.push_back(type);
} catch (...){
- if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) { //it's a multiplier
+ if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) { std::cout << "multiplier normsl" << std::endl;//it's a multiplier
if(side == SIDE_INPUT) assert(inputsig.size() > 0);
else assert(outputsig.size() > 0);
- int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
+ int multiplier = 1;
+ try {
+ multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid ///////////
+ } catch(...) {
+ multiplier = 1; std::cout << "multiplier 333333333" << std::endl;
+ }
for(int i=1; i<multiplier; i++) {
- if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
- else outputsig.push_back(outputsig.back());
+ if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
+ else outputsig.push_back(outputsig.back());
}
}
+
else if(side == SIDE_INPUT) { //it's the function name, at least it better be
side = SIDE_NAME;
fn_name.append("_");
@@ -268,6 +282,24 @@ inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector<void
}
+// new
+inline void run_cast_test1_s16ic(volk_gnsssdr_fn_1arg_s16ic func, std::vector<void *> &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
+{
+ while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test2_s16ic(volk_gnsssdr_fn_2arg_s16ic func, std::vector<void *> &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
+{
+ while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
+}
+
+inline void run_cast_test3_s16ic(volk_gnsssdr_fn_3arg_s16ic func, std::vector<void *> &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
+{
+ while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
+}
+
+// end new
+
inline void run_cast_test8(volk_gnsssdr_fn_8arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
{
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], vlen, arch.c_str());
@@ -439,14 +471,14 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
unsigned int iter,
std::vector<volk_gnsssdr_test_results_t> *results,
std::string puppet_master_name,
- bool benchmark_mode
-) {
+ bool benchmark_mode)
+{
// Initialize this entry in results vector
results->push_back(volk_gnsssdr_test_results_t());
results->back().name = name;
results->back().vlen = vlen;
results->back().iter = iter;
- std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl;
+ std::cout << "RUN_VOLK_GNSSSDR_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl;
// vlen_twiddle will increase vlen for malloc and data generation
// but kernels will still be called with the user provided vlen.
@@ -546,7 +578,14 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
{
if(inputsc[0].is_complex)
{
- run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ if(inputsc[0].size == 2)
+ {
+ run_cast_test1_s16ic((volk_gnsssdr_fn_1arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ }
+ else
+ {
+ run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ }
}
else
{
@@ -557,6 +596,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
else throw "unsupported 1 arg function >1 scalars";
break;
case 2:
+ //std::cout << "case 2 " << inputsc.size() << std::endl;
if(inputsc.size() == 0)
{
run_cast_test2((volk_gnsssdr_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
@@ -577,7 +617,14 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
{
if(inputsc[0].is_complex)
{
- run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ if(inputsc[0].size == 2)
+ {
+ run_cast_test2_s16ic((volk_gnsssdr_fn_2arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ }
+ else
+ {
+ run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ }
}
else
{
@@ -590,6 +637,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
case 3:
if(inputsc.size() == 0)
{
+ // multipliers are here!
run_cast_test3((volk_gnsssdr_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
}
else if(inputsc.size() == 1 && inputsc[0].is_float)
@@ -608,7 +656,16 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
{
if(inputsc[0].is_complex)
{
- run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ {
+ if(inputsc[0].size == 4)
+ {
+ run_cast_test3_s16ic((volk_gnsssdr_fn_3arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ }
+ else
+ {
+ run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
+ }
+ }
}
else
{
@@ -650,7 +707,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
bool fail;
bool fail_global = false;
std::vector<bool> arch_results;
- for(size_t i=0; i<arch_list.size(); i++)
+ for(size_t i = 0; i < arch_list.size(); i++)
{
fail = false;
if(i != generic_offset)
@@ -698,29 +755,57 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
}
break;
case 4:
- if(both_sigs[j].is_signed)
+ if(both_sigs[j].is_complex)
{
- fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ if(both_sigs[j].is_signed)
+ {
+ fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
+ else
+ {
+ fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
}
else
{
- fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ if(both_sigs[j].is_signed)
+ {
+ fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
+ else
+ {
+ fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
}
break;
case 2:
- if(both_sigs[j].is_signed)
+ if(both_sigs[j].is_complex)
{
- fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ if(both_sigs[j].is_signed)
+ {
+ fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
+ else
+ {
+ fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
}
else
{
- fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ if(both_sigs[j].is_signed)
+ {
+ fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); //
+ }
+ else
+ {
+ fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ }
}
break;
case 1:
if(both_sigs[j].is_signed)
{
- fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+ fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), 3); // check volk_gnsssdr_32fc_convert_8ic !
}
else
{
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.h
index 971d1c1..c7a0e76 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.h
@@ -173,9 +173,9 @@ typedef void (*volk_gnsssdr_fn_3arg_s8ic)(void *, void *, void *, lv_8sc_t, unsi
//typedef void (*volk_gnsssdr_fn_1arg_s16i)(void *, int16_t, unsigned int, const char*); //one input vector, one scalar int16_t input
//typedef void (*volk_gnsssdr_fn_2arg_s16i)(void *, void *, int16_t, unsigned int, const char*);
//typedef void (*volk_gnsssdr_fn_3arg_s16i)(void *, void *, void *, int16_t, unsigned int, const char*);
-//typedef void (*volk_gnsssdr_fn_1arg_s16ic)(void *, lv_16sc_t, unsigned int, const char*); //one input vector, one scalar lv_16sc_t vector input
-//typedef void (*volk_gnsssdr_fn_2arg_s16ic)(void *, void *, lv_16sc_t, unsigned int, const char*);
-//typedef void (*volk_gnsssdr_fn_3arg_s16ic)(void *, void *, void *, lv_16sc_t, unsigned int, const char*);
+typedef void (*volk_gnsssdr_fn_1arg_s16ic)(void *, lv_16sc_t, unsigned int, const char*); //one input vector, one scalar lv_16sc_t vector input
+typedef void (*volk_gnsssdr_fn_2arg_s16ic)(void *, void *, lv_16sc_t, unsigned int, const char*);
+typedef void (*volk_gnsssdr_fn_3arg_s16ic)(void *, void *, void *, lv_16sc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_6arg_s16ic)(void *, void *, void *, void *, void *, void *, lv_16sc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_8arg)(void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
diff --git a/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.cc b/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.cc
index b0f8f74..a82977d 100644
--- a/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.cc
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.cc
@@ -31,18 +31,13 @@
*
* -------------------------------------------------------------------------
*/
+
#include "cpu_multicorrelator_16sc.h"
#include <cmath>
#include <iostream>
#include <gnuradio/fxpt.h> // fixed point sine and cosine
-
#include "volk_gnsssdr/volk_gnsssdr.h"
-#define LV_HAVE_GENERIC
-#define LV_HAVE_SSE2
-
-#include "volk_gnsssdr_16ic_xn_resampler_16ic_xn.h"
-#include "volk_gnsssdr_16ic_xn_dot_prod_16ic_xn.h"
bool cpu_multicorrelator_16sc::init(
int max_signal_length_samples,
@@ -99,7 +94,7 @@ void cpu_multicorrelator_16sc::update_local_code(int correlator_length_samples,f
tmp_code_phases_chips[n] = d_shifts_chips[n] - rem_code_phase_chips;
}
- volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(d_local_codes_resampled,
+ volk_gnsssdr_16ic_xn_resampler_16ic_xn(d_local_codes_resampled,
d_local_code_in,
tmp_code_phases_chips,
code_phase_step_chips,
@@ -153,7 +148,7 @@ bool cpu_multicorrelator_16sc::Carrier_wipeoff_multicorrelator_resampler(
//std::cout<<"d_sig_doppler_wiped 16sc="<<d_sig_doppler_wiped[23]<<std::endl;
update_local_code(signal_length_samples, rem_code_phase_chips, code_phase_step_chips);
- volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_a_sse2(d_corr_out, d_sig_doppler_wiped, (const lv_16sc_t**)d_local_codes_resampled, signal_length_samples, d_n_correlators);
+ volk_gnsssdr_16ic_x2_dot_prod_16ic_xn(d_corr_out, d_sig_doppler_wiped, (const lv_16sc_t**)d_local_codes_resampled, signal_length_samples, d_n_correlators);
//for (int current_correlator_tap = 0; current_correlator_tap < d_n_correlators; current_correlator_tap++)
// {
diff --git a/src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h b/src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
deleted file mode 100644
index 69c70f3..0000000
--- a/src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*!
- * \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
- * \brief Volk protokernel: resample a 16 bits complex vector
- * \authors <ul>
- * <li> Javier Arribas, 2015. jarribas(at)cttc.es
- * </ul>
- *
- * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
- * and 8 bits the imaginary part) and accumulates them
- *
- * -------------------------------------------------------------------------
- *
- * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
- *
- * GNSS-SDR is a software defined Global Navigation
- * Satellite Systems receiver
- *
- * This file is part of GNSS-SDR.
- *
- * GNSS-SDR is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * GNSS-SDR is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
- *
- * -------------------------------------------------------------------------
- */
-
-#ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
-#define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
-
-#include <volk_gnsssdr/volk_gnsssdr_common.h>
-#include <volk_gnsssdr/volk_gnsssdr_complex.h>
-#include <cmath>
-//#pragma STDC FENV_ACCESS ON
-
-#ifdef LV_HAVE_GENERIC
-
-//int round_int( float r ) {
-// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
-//}
-/*!
- \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
- \param cVector The vector where the result will be stored
- \param aVector One of the vectors to be multiplied
- \param bVector One of the vectors to be multiplied
- \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
- */
-
-static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
-{
- int local_code_chip_index;
- //fesetround(FE_TONEAREST);
- for (int current_vector = 0; current_vector < num_out_vectors; current_vector++)
- {
- for (unsigned int n = 0; n < num_output_samples; n++)
- {
- // resample code for current tap
- local_code_chip_index = round(code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips[current_vector]-0.5f);
- if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
- if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips;
- //std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
- result[current_vector][n] = local_code[local_code_chip_index];
- }
- }
- //std::cout<<std::endl;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
-{
-
- _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
- unsigned int number;
- const unsigned int quarterPoints = num_output_samples / 4;
-
- lv_16sc_t** _result = result;
- __attribute__((aligned(16))) int local_code_chip_index[4];
- float tmp_rem_code_phase_chips;
- __m128 _rem_code_phase,_code_phase_step_chips;
- __m128i _code_length_chips,_code_length_chips_minus1;
- __m128 _code_phase_out,_code_phase_out_with_offset;
-
- _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
- __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
- four_times_code_length_chips_minus1[0]=code_length_chips-1;
- four_times_code_length_chips_minus1[1]=code_length_chips-1;
- four_times_code_length_chips_minus1[2]=code_length_chips-1;
- four_times_code_length_chips_minus1[3]=code_length_chips-1;
-
- __attribute__((aligned(16))) int four_times_code_length_chips[4];
- four_times_code_length_chips[0]=code_length_chips;
- four_times_code_length_chips[1]=code_length_chips;
- four_times_code_length_chips[2]=code_length_chips;
- four_times_code_length_chips[3]=code_length_chips;
-
- _code_length_chips = _mm_loadu_si128((__m128i*)&four_times_code_length_chips); //load float to all four float values in m128 register
- _code_length_chips_minus1 = _mm_loadu_si128((__m128i*)&four_times_code_length_chips_minus1); //load float to all four float values in m128 register
-
- __m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
-
- __m128i zero=_mm_setzero_si128();
-
- __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
- __m128 _4output_index=_mm_load_ps(init_idx_float);
- __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
- __m128 _4constant_float=_mm_load_ps(init_4constant_float);
-
- int current_vector=0;
- int sample_idx=0;
- for(number=0;number < quarterPoints; number++){
- //common to all outputs
- _code_phase_out = _mm_mul_ps(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
-
- //output vector dependant (different code phase offset)
- for(current_vector=0;current_vector<num_out_vectors;current_vector++)
- {
- tmp_rem_code_phase_chips=rem_code_phase_chips[current_vector]-0.5f; // adjust offset to perform correct rounding (chip transition at 0)
- _rem_code_phase = _mm_load1_ps(&tmp_rem_code_phase_chips); //load float to all four float values in m128 register
-
- _code_phase_out_with_offset = _mm_add_ps(_code_phase_out,_rem_code_phase); //add the phase offset
- _code_phase_out_int=_mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
-
- negative_indexes=_mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
- _code_phase_out_int_neg=_mm_add_epi32(_code_phase_out_int,_code_length_chips); //the negative values branch
- _code_phase_out_int_neg=_mm_xor_si128(_code_phase_out_int,_mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
-
- overflow_indexes=_mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
- _code_phase_out_int_over=_mm_sub_epi32(_code_phase_out_int_neg,_code_length_chips); //the negative values branch
- _code_phase_out_int_over=_mm_xor_si128(_code_phase_out_int_neg,_mm_and_si128( overflow_indexes,_mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
-
- _mm_storeu_si128((__m128i*)local_code_chip_index,_code_phase_out_int_over); // Store the results back
-
- //todo: optimize the local code lookup table with intrinsics, if possible
- _result[current_vector][sample_idx]=local_code[local_code_chip_index[0]];
- _result[current_vector][sample_idx+1]=local_code[local_code_chip_index[1]];
- _result[current_vector][sample_idx+2]=local_code[local_code_chip_index[2]];
- _result[current_vector][sample_idx+3]=local_code[local_code_chip_index[3]];
-
-
- }
- _4output_index = _mm_add_ps(_4output_index,_4constant_float);
- sample_idx+=4;
- }
-
- for(number = quarterPoints * 4;number < num_output_samples; number++){
-
- for(current_vector=0;current_vector<num_out_vectors;current_vector++)
- {
- local_code_chip_index[0]=static_cast<int>(code_phase_step_chips*static_cast<float>(number) + rem_code_phase_chips[current_vector]);
- if (local_code_chip_index[0] < 0.0) local_code_chip_index[0] += code_length_chips-1;
- if (local_code_chip_index[0] > (code_length_chips-1)) local_code_chip_index[0] -= code_length_chips;
- _result[current_vector][number]=local_code[local_code_chip_index[0]];
- }
-
- }
-
-}
-#endif /* LV_HAVE_SSE2 */
-
-#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H*/
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list