[hamradio-commits] [gnss-sdr] 62/149: Merge branch 'new_volk_module' of git+ssh://github.com/gnss-sdr/gnss-sdr into new_volk_module
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Feb 6 19:43:03 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit e57d02321df91369b3c8fa49a175e60da22ef286
Merge: ae2b594 5d0186e
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date: Wed Jan 13 20:04:18 2016 +0100
Merge branch 'new_volk_module' of git+ssh://github.com/gnss-sdr/gnss-sdr
into new_volk_module
# Conflicts:
# src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
# src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
...urated_arithmetic.h => saturation_arithmetic.h} | 8 ++++----
.../volk_gnsssdr_16ic_resampler_16ic.h | 1 -
.../volk_gnsssdr_16ic_x2_dot_prod_16ic.h | 22 ++++++++++------------
.../volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h | 22 +++++++++++-----------
.../volk_gnsssdr_16ic_x2_multiply_16ic.h | 2 --
.../volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 7 ++++---
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h | 1 -
.../volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h | 1 -
.../volk_gnsssdr_64f_accumulator_64f.h | 1 -
.../volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h | 1 -
.../volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h | 1 -
.../kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h | 1 -
.../volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h | 1 -
.../volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h | 1 -
.../volk_gnsssdr_8ic_magnitude_squared_8i.h | 1 -
.../volk_gnsssdr_8ic_s8ic_multiply_8ic.h | 1 -
.../volk_gnsssdr_8ic_x2_dot_prod_8ic.h | 2 --
.../volk_gnsssdr_8ic_x2_multiply_8ic.h | 1 -
.../volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h | 1 -
19 files changed, 29 insertions(+), 47 deletions(-)
diff --cc src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturation_arithmetic.h
index 11f3f1c,15b3609..bd7e267
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturation_arithmetic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturation_arithmetic.h
@@@ -3,29 -3,29 +3,29 @@@
#include <limits.h>
//#include <types.h>
- static inline int16_t sat_adds16b(int16_t x, int16_t y)
+ static inline int16_t sat_adds16i(int16_t x, int16_t y)
{
-// int16_t ux = x;
-// int16_t uy = y;
-// int16_t res = ux + uy;
-//
-// /* Calculate overflowed result. (Don't change the sign bit of ux) */
-// ux = (ux >> 15) + SHRT_MAX;
-//
-// /* Force compiler to use cmovns instruction */
-// if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
-// {
-// res = ux;
-// }
-//
-// return res;
+ // int16_t ux = x;
+ // int16_t uy = y;
+ // int16_t res = ux + uy;
+ //
+ // /* Calculate overflowed result. (Don't change the sign bit of ux) */
+ // ux = (ux >> 15) + SHRT_MAX;
+ //
+ // /* Force compiler to use cmovns instruction */
+ // if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
+ // {
+ // res = ux;
+ // }
+ //
+ // return res;
- int32_t res = (int32_t) x + (int32_t) y;
+ int32_t res = (int32_t) x + (int32_t) y;
- if (res < SHRT_MIN) res = SHRT_MIN;
- if (res > SHRT_MAX) res = SHRT_MAX;
+ if (res < SHRT_MIN) res = SHRT_MIN;
+ if (res > SHRT_MAX) res = SHRT_MAX;
- return res;
+ return res;
}
- #endif /*SATURATED_ARITHMETIC_H_*/
+ #endif /*SATURATION_ARITHMETIC_H_*/
diff --cc src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
index 7f9986e,c256bdc..375b8aa
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
@@@ -57,8 -57,8 +57,8 @@@ static inline void volk_gnsssdr_16ic_x2
{
//r*a.r - i*a.i, i*a.r + r*a.i
//result[n_vec]+=in_common[n]*in_a[n_vec][n];
- lv_16sc_t tmp = in_common[n]*in_a[n_vec][n];
+ lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
- result[n_vec] = lv_cmake(sat_adds16b(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16b(lv_cimag(result[n_vec]), lv_cimag(tmp)));
+ result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
}
}
}
@@@ -148,13 -148,13 +148,13 @@@ static inline void volk_gnsssdr_16ic_x2
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
- for(unsigned int n = sse_iters * 4;n < num_points; n++){
-
- lv_16sc_t tmp = in_common[n]*in_a[n_vec][n];
+ for(unsigned int n = sse_iters * 4; n < num_points; n++)
+ {
+ lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
- _out[n_vec] = lv_cmake(sat_adds16b(lv_creal(_out[n_vec]), lv_creal(tmp)),
- sat_adds16b(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
- }
+ _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
+ sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
+ }
}
}
diff --cc src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
index c53075a,0000000..7173308
mode 100644,000000..100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
@@@ -1,171 -1,0 +1,172 @@@
+/*!
+ * \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
- * \brief Volk protokernel: resample a 16 bits complex vector
++ * \brief Volk protokernel: Resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
+ * \authors <ul>
+ * <li> Javier Arribas, 2015. jarribas(at)cttc.es
+ * </ul>
+ *
- * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
- * and 8 bits the imaginary part) and accumulates them
++ * Volk protokernel that esamples N 16 bits integer short complex vectors using zero hold resample algorithm.
++ * It is optimized to resample a sigle GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
++ * (i.e. it creates the Early, Prompt, and Late code replicas)
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
+#define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
+
+#include <math.h>
+#include <volk_gnsssdr/volk_gnsssdr_common.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+
+//#pragma STDC FENV_ACCESS ON
+
+#ifdef LV_HAVE_GENERIC
+
+//int round_int( float r ) {
+// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
+//}
+/*!
+ \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
+ \param cVector The vector where the result will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
+ */
+
+static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
+{
+ int local_code_chip_index;
+ //fesetround(FE_TONEAREST);
+ for (int current_vector = 0; current_vector < num_out_vectors; current_vector++)
+ {
+ for (unsigned int n = 0; n < num_output_samples; n++)
+ {
+ // resample code for current tap
+ local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f);
+ if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
+ if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips;
+ //std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
+ result[current_vector][n] = local_code[local_code_chip_index];
+ }
+ }
+ //std::cout<<std::endl;
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+#ifdef LV_HAVE_SSE2
+#include <emmintrin.h>
+static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
+{
+ _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
+ unsigned int number;
+ const unsigned int quarterPoints = num_output_samples / 4;
+
+ lv_16sc_t** _result = result;
+ __attribute__((aligned(16))) int local_code_chip_index[4];
+ float tmp_rem_code_phase_chips;
+ __m128 _rem_code_phase,_code_phase_step_chips;
+ __m128i _code_length_chips,_code_length_chips_minus1;
+ __m128 _code_phase_out,_code_phase_out_with_offset;
+
+ _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
+ __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
+ four_times_code_length_chips_minus1[0] = code_length_chips - 1;
+ four_times_code_length_chips_minus1[1] = code_length_chips - 1;
+ four_times_code_length_chips_minus1[2] = code_length_chips - 1;
+ four_times_code_length_chips_minus1[3] = code_length_chips - 1;
+
+ __attribute__((aligned(16))) int four_times_code_length_chips[4];
+ four_times_code_length_chips[0] = code_length_chips;
+ four_times_code_length_chips[1] = code_length_chips;
+ four_times_code_length_chips[2] = code_length_chips;
+ four_times_code_length_chips[3] = code_length_chips;
+
+ _code_length_chips = _mm_loadu_si128((__m128i*)&four_times_code_length_chips); //load float to all four float values in m128 register
+ _code_length_chips_minus1 = _mm_loadu_si128((__m128i*)&four_times_code_length_chips_minus1); //load float to all four float values in m128 register
+
+ __m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
+
+ __m128i zero=_mm_setzero_si128();
+
+ __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
+ __m128 _4output_index = _mm_load_ps(init_idx_float);
+ __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
+ __m128 _4constant_float = _mm_load_ps(init_4constant_float);
+
+ int current_vector = 0;
+ int sample_idx = 0;
+ for(number = 0; number < quarterPoints; number++)
+ {
+ //common to all outputs
+ _code_phase_out = _mm_mul_ps(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
+
+ //output vector dependant (different code phase offset)
+ for(current_vector = 0; current_vector < num_out_vectors; current_vector++)
+ {
+ tmp_rem_code_phase_chips = rem_code_phase_chips[current_vector] - 0.5f; // adjust offset to perform correct rounding (chip transition at 0)
+ _rem_code_phase = _mm_load1_ps(&tmp_rem_code_phase_chips); //load float to all four float values in m128 register
+
+ _code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
+ _code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
+
+ negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
+ _code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
+ _code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
+
+ overflow_indexes = _mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
+ _code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
+ _code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
+
+ _mm_storeu_si128((__m128i*)local_code_chip_index, _code_phase_out_int_over); // Store the results back
+
+ //todo: optimize the local code lookup table with intrinsics, if possible
+ _result[current_vector][sample_idx] = local_code[local_code_chip_index[0]];
+ _result[current_vector][sample_idx + 1] = local_code[local_code_chip_index[1]];
+ _result[current_vector][sample_idx + 2] = local_code[local_code_chip_index[2]];
+ _result[current_vector][sample_idx + 3] = local_code[local_code_chip_index[3]];
+ }
+ _4output_index = _mm_add_ps(_4output_index, _4constant_float);
+ sample_idx += 4;
+ }
+
+ for(number = quarterPoints * 4; number < num_output_samples; number++)
+ {
+ for(current_vector = 0; current_vector < num_out_vectors; current_vector++)
+ {
+ local_code_chip_index[0] = (int)(code_phase_step_chips * (float)(number) + rem_code_phase_chips[current_vector]);
+ if (local_code_chip_index[0] < 0.0) local_code_chip_index[0] += code_length_chips - 1;
+ if (local_code_chip_index[0] > (code_length_chips - 1)) local_code_chip_index[0] -= code_length_chips;
+ _result[current_vector][number] = local_code[local_code_chip_index[0]];
+ }
+
+ }
+
+}
+#endif /* LV_HAVE_SSE2 */
+
+#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H*/
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list