[hamradio-commits] [gnss-sdr] 62/149: Merge branch 'new_volk_module' of git+ssh://github.com/gnss-sdr/gnss-sdr into new_volk_module

Sat Feb 6 19:43:03 UTC 2016

This is an automated email from the git hooks/post-receive script.

carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.

commit e57d02321df91369b3c8fa49a175e60da22ef286
Merge: ae2b594 5d0186e
Author: Carles Fernandez <carles.fernandez at gmail.com>
Date:   Wed Jan 13 20:04:18 2016 +0100

    Merge branch 'new_volk_module' of git+ssh://github.com/gnss-sdr/gnss-sdr
    into new_volk_module
    
    # Conflicts:
    #	src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
    #	src/algorithms/tracking/libs/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h

 ...urated_arithmetic.h => saturation_arithmetic.h} |  8 ++++----
 .../volk_gnsssdr_16ic_resampler_16ic.h             |  1 -
 .../volk_gnsssdr_16ic_x2_dot_prod_16ic.h           | 22 ++++++++++------------
 .../volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h        | 22 +++++++++++-----------
 .../volk_gnsssdr_16ic_x2_multiply_16ic.h           |  2 --
 .../volk_gnsssdr_16ic_xn_resampler_16ic_xn.h       |  7 ++++---
 .../volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h  |  1 -
 .../volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h   |  1 -
 .../volk_gnsssdr_64f_accumulator_64f.h             |  1 -
 .../volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h |  1 -
 .../volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h   |  1 -
 .../kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h |  1 -
 .../volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h       |  1 -
 .../volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h  |  1 -
 .../volk_gnsssdr_8ic_magnitude_squared_8i.h        |  1 -
 .../volk_gnsssdr_8ic_s8ic_multiply_8ic.h           |  1 -
 .../volk_gnsssdr_8ic_x2_dot_prod_8ic.h             |  2 --
 .../volk_gnsssdr_8ic_x2_multiply_8ic.h             |  1 -
 .../volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h  |  1 -
 19 files changed, 29 insertions(+), 47 deletions(-)

diff --cc src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturation_arithmetic.h
index 11f3f1c,15b3609..bd7e267

--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturation_arithmetic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/saturation_arithmetic.h
@@@ -3,29 -3,29 +3,29 @@@
  
  #include <limits.h>
  //#include <types.h>
- static inline int16_t sat_adds16b(int16_t x, int16_t y)
+ static inline int16_t sat_adds16i(int16_t x, int16_t y)
  {
 -//	int16_t ux = x;
 -//	int16_t uy = y;
 -//	int16_t res = ux + uy;
 -//
 -//	/* Calculate overflowed result. (Don't change the sign bit of ux) */
 -//	ux = (ux >> 15) + SHRT_MAX;
 -//
 -//	/* Force compiler to use cmovns instruction */
 -//	if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
 -//	{
 -//		res = ux;
 -//	}
 -//
 -//	return res;
 +    //	int16_t ux = x;
 +    //	int16_t uy = y;
 +    //	int16_t res = ux + uy;
 +    //
 +    //	/* Calculate overflowed result. (Don't change the sign bit of ux) */
 +    //	ux = (ux >> 15) + SHRT_MAX;
 +    //
 +    //	/* Force compiler to use cmovns instruction */
 +    //	if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
 +    //	{
 +    //		res = ux;
 +    //	}
 +    //
 +    //	return res;
  
 -	int32_t res = (int32_t) x + (int32_t) y;
 +    int32_t res = (int32_t) x + (int32_t) y;
  
 -	if (res < SHRT_MIN) res = SHRT_MIN;
 -	if (res > SHRT_MAX) res = SHRT_MAX;
 +    if (res < SHRT_MIN) res = SHRT_MIN;
 +    if (res > SHRT_MAX) res = SHRT_MAX;
  
 -	return res;
 +    return res;
  }
  
- #endif /*SATURATED_ARITHMETIC_H_*/
+ #endif /*SATURATION_ARITHMETIC_H_*/
diff --cc src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
index 7f9986e,c256bdc..375b8aa
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
@@@ -57,8 -57,8 +57,8 @@@ static inline void volk_gnsssdr_16ic_x2
                  {
                      //r*a.r - i*a.i, i*a.r + r*a.i
                      //result[n_vec]+=in_common[n]*in_a[n_vec][n];
 -                    lv_16sc_t tmp = in_common[n]*in_a[n_vec][n];
 +                    lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
-                     result[n_vec] = lv_cmake(sat_adds16b(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16b(lv_cimag(result[n_vec]), lv_cimag(tmp)));
+                     result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
                  }
          }
  }
@@@ -148,13 -148,13 +148,13 @@@ static inline void volk_gnsssdr_16ic_x2
  
      for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
          {
 -            for(unsigned int n  = sse_iters * 4;n < num_points; n++){
 -
 -                    lv_16sc_t tmp = in_common[n]*in_a[n_vec][n];
 +            for(unsigned int n  = sse_iters * 4; n < num_points; n++)
 +                {
 +                    lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
  
-                     _out[n_vec] = lv_cmake(sat_adds16b(lv_creal(_out[n_vec]), lv_creal(tmp)),
-                             sat_adds16b(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
-                 }
+                     _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
+                             sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
+             }
          }
  
  }
diff --cc src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
index c53075a,0000000..7173308
mode 100644,000000..100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
@@@ -1,171 -1,0 +1,172 @@@
 +/*!
 + * \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
-  * \brief Volk protokernel: resample a 16 bits complex vector
++ * \brief Volk protokernel: Resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
 + * \authors <ul>
 + *          <li> Javier Arribas, 2015. jarribas(at)cttc.es
 + *          </ul>
 + *
-  * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part 
-  * and 8 bits the imaginary part) and accumulates them
++ * Volk protokernel that esamples N 16 bits integer short complex vectors using zero hold resample algorithm.
++ * It is optimized to resample a sigle GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
++ * (i.e. it creates the Early, Prompt, and Late code replicas)
 + *
 + * -------------------------------------------------------------------------
 + *
 + * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors)
 + *
 + * GNSS-SDR is a software defined Global Navigation
 + *          Satellite Systems receiver
 + *
 + * This file is part of GNSS-SDR.
 + *
 + * GNSS-SDR is free software: you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation, either version 3 of the License, or
 + * (at your option) any later version.
 + *
 + * GNSS-SDR is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
 + *
 + * -------------------------------------------------------------------------
 + */
 +
 +#ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
 +#define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H
 +
 +#include <math.h>
 +#include <volk_gnsssdr/volk_gnsssdr_common.h>
 +#include <volk_gnsssdr/volk_gnsssdr_complex.h>
 +
 +//#pragma STDC FENV_ACCESS ON
 +
 +#ifdef LV_HAVE_GENERIC
 +
 +//int round_int( float r ) {
 +//    return (r > 0.0) ? (r + 0.5) : (r - 0.5);
 +//}
 +/*!
 + \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
 + \param cVector The vector where the result will be stored
 + \param aVector One of the vectors to be multiplied
 + \param bVector One of the vectors to be multiplied
 + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
 + */
 +
 +static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
 +{
 +    int local_code_chip_index;
 +    //fesetround(FE_TONEAREST);
 +    for (int current_vector = 0; current_vector < num_out_vectors; current_vector++)
 +        {
 +            for (unsigned int n = 0; n < num_output_samples; n++)
 +                {
 +                    // resample code for current tap
 +                    local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f);
 +                    if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
 +                    if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips;
 +                    //std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
 +                    result[current_vector][n] = local_code[local_code_chip_index];
 +                }
 +        }
 +    //std::cout<<std::endl;
 +}
 +
 +#endif /*LV_HAVE_GENERIC*/
 +
 +
 +#ifdef LV_HAVE_SSE2
 +#include <emmintrin.h>
 +static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors)
 +{
 +    _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
 +    unsigned int number;
 +    const unsigned int quarterPoints = num_output_samples / 4;
 +
 +    lv_16sc_t** _result = result;
 +    __attribute__((aligned(16))) int local_code_chip_index[4];
 +    float tmp_rem_code_phase_chips;
 +    __m128 _rem_code_phase,_code_phase_step_chips;
 +    __m128i _code_length_chips,_code_length_chips_minus1;
 +    __m128 _code_phase_out,_code_phase_out_with_offset;
 +
 +    _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
 +    __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
 +    four_times_code_length_chips_minus1[0] = code_length_chips - 1;
 +    four_times_code_length_chips_minus1[1] = code_length_chips - 1;
 +    four_times_code_length_chips_minus1[2] = code_length_chips - 1;
 +    four_times_code_length_chips_minus1[3] = code_length_chips - 1;
 +
 +    __attribute__((aligned(16))) int four_times_code_length_chips[4];
 +    four_times_code_length_chips[0] = code_length_chips;
 +    four_times_code_length_chips[1] = code_length_chips;
 +    four_times_code_length_chips[2] = code_length_chips;
 +    four_times_code_length_chips[3] = code_length_chips;
 +
 +    _code_length_chips = _mm_loadu_si128((__m128i*)&four_times_code_length_chips); //load float to all four float values in m128 register
 +    _code_length_chips_minus1 = _mm_loadu_si128((__m128i*)&four_times_code_length_chips_minus1); //load float to all four float values in m128 register
 +
 +    __m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
 +
 +    __m128i zero=_mm_setzero_si128();
 +
 +    __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
 +    __m128 _4output_index = _mm_load_ps(init_idx_float);
 +    __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
 +    __m128 _4constant_float = _mm_load_ps(init_4constant_float);
 +
 +    int current_vector = 0;
 +    int sample_idx = 0;
 +    for(number = 0; number < quarterPoints; number++)
 +        {
 +            //common to all outputs
 +            _code_phase_out = _mm_mul_ps(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
 +
 +            //output vector dependant (different code phase offset)
 +            for(current_vector = 0; current_vector < num_out_vectors; current_vector++)
 +                {
 +                    tmp_rem_code_phase_chips = rem_code_phase_chips[current_vector] - 0.5f; // adjust offset to perform correct rounding (chip transition at 0)
 +                    _rem_code_phase = _mm_load1_ps(&tmp_rem_code_phase_chips); //load float to all four float values in m128 register
 +
 +                    _code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
 +                    _code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
 +
 +                    negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
 +                    _code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
 +                    _code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
 +
 +                    overflow_indexes = _mm_cmpgt_epi32  (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
 +                    _code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
 +                    _code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
 +
 +                    _mm_storeu_si128((__m128i*)local_code_chip_index, _code_phase_out_int_over); // Store the results back
 +
 +                    //todo: optimize the local code lookup table with intrinsics, if possible
 +                    _result[current_vector][sample_idx] = local_code[local_code_chip_index[0]];
 +                    _result[current_vector][sample_idx + 1] = local_code[local_code_chip_index[1]];
 +                    _result[current_vector][sample_idx + 2] = local_code[local_code_chip_index[2]];
 +                    _result[current_vector][sample_idx + 3] = local_code[local_code_chip_index[3]];
 +                }
 +            _4output_index = _mm_add_ps(_4output_index, _4constant_float);
 +            sample_idx += 4;
 +        }
 +
 +    for(number = quarterPoints * 4; number < num_output_samples; number++)
 +        {
 +            for(current_vector = 0; current_vector < num_out_vectors; current_vector++)
 +                {
 +                    local_code_chip_index[0] = (int)(code_phase_step_chips * (float)(number) + rem_code_phase_chips[current_vector]);
 +                    if (local_code_chip_index[0] < 0.0) local_code_chip_index[0] += code_length_chips - 1;
 +                    if (local_code_chip_index[0] > (code_length_chips - 1)) local_code_chip_index[0] -= code_length_chips;
 +                    _result[current_vector][number] = local_code[local_code_chip_index[0]];
 +                }
 +
 +        }
 +
 +}
 +#endif /* LV_HAVE_SSE2 */
 +
 +#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H*/

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git