[hamradio-commits] [gnss-sdr] 100/126: Updated GPU tracking implementation. Bug fixed in cuda correlator and performance improvements
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Sat Dec 26 18:38:06 UTC 2015
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit 847716428e1e9d32c5f2c2e97064306ea4580558
Author: Javier Arribas <javiarribas at gmail.com>
Date: Fri Dec 11 13:53:43 2015 +0100
Updated GPU tracking implementation. Bug fixed in cuda correlator and
performance improvements
---
conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf | 4 +-
.../gps_l1_ca_dll_pll_tracking_gpu_cc.cc | 347 +++++++++++----------
.../gps_l1_ca_dll_pll_tracking_gpu_cc.h | 20 +-
.../tracking/libs/cuda_multicorrelator.cu | 121 +++----
.../tracking/libs/cuda_multicorrelator.h | 2 +-
5 files changed, 260 insertions(+), 234 deletions(-)
diff --git a/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf b/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf
index 52b1781..98359bc 100644
--- a/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf
+++ b/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf
@@ -159,7 +159,7 @@ Resampler.sample_freq_out=4000000
;######### CHANNELS GLOBAL CONFIG ############
;#count: Number of available GPS satellite channels.
-Channels_GPS.count=8
+Channels_GPS.count=12
;#count: Number of available Galileo satellite channels.
Channels_Galileo.count=0
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
@@ -204,7 +204,7 @@ Acquisition_GPS.sampled_ms=1
;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
Acquisition_GPS.implementation=GPS_L1_CA_PCPS_Acquisition
;#threshold: Acquisition threshold
-Acquisition_GPS.threshold=0.06
+Acquisition_GPS.threshold=0.01
;#pfa: Acquisition false alarm probability. This option overrides the threshold option. Only use with implementations: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
;Acquisition_GPS.pfa=0.01
;#doppler_max: Maximum expected Doppler shift [Hz]
diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
index d4fffeb..8a4c142 100644
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
@@ -1,13 +1,8 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu_cc.cc
- * \brief Implementation of a code DLL + carrier PLL tracking block, GPU ACCELERATED
+ * \brief Implementation of a code DLL + carrier PLL tracking block GPU ACCELERATED
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
- * Code DLL + carrier PLL according to the algorithms described in:
- * [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
- * A Software-Defined GPS and Galileo Receiver. A Single-Frequency
- * Approach, Birkhauser, 2007
- *
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
@@ -40,6 +35,7 @@
#include <sstream>
#include <boost/lexical_cast.hpp>
#include <gnuradio/io_signature.h>
+#include <volk/volk.h>
#include <glog/logging.h>
#include "gnss_synchro.h"
#include "gps_sdr_signal_processing.h"
@@ -47,7 +43,6 @@
#include "lock_detectors.h"
#include "GPS_L1_CA.h"
#include "control_message_factory.h"
-#include <volk/volk.h> //volk_alignement
// includes
#include <cuda_profiler_api.h>
@@ -80,10 +75,14 @@ gps_l1_ca_dll_pll_make_tracking_gpu_cc(
}
+
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::forecast (int noutput_items,
gr_vector_int &ninput_items_required)
{
- ninput_items_required[0] = static_cast<int>(d_vector_length) * 2; //set the required available samples in each call
+ if (noutput_items != 0)
+ {
+ ninput_items_required[0] = static_cast<int>(d_vector_length) * 2; //set the required available samples in each call
+ }
}
@@ -108,10 +107,11 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
d_fs_in = fs_in;
d_vector_length = vector_length;
d_dump_filename = dump_filename;
+ d_correlation_length_samples = static_cast<int>(d_vector_length);
// Initialize tracking ==========================================
d_code_loop_filter.set_DLL_BW(dll_bw_hz);
- d_carrier_loop_filter.set_PLL_BW(pll_bw_hz);
+ d_carrier_loop_filter.set_params(10.0, pll_bw_hz,2);
//--- DLL variables --------------------------------------------------------
d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips)
@@ -120,32 +120,33 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
cudaSetDeviceFlags(cudaDeviceMapHost);
//allocate host memory
//pinned memory mode - use special function to get OS-pinned memory
- int N_CORRELATORS = 3;
+ d_n_correlator_taps = 3; // Early, Prompt, and Late
// Get space for a vector with the C/A code replica sampled 1x/chip
- cudaHostAlloc((void**)&d_ca_code, (GPS_L1_CA_CODE_LENGTH_CHIPS* sizeof(gr_complex)), cudaHostAllocMapped || cudaHostAllocWriteCombined);
+ cudaHostAlloc((void**)&d_ca_code, (static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)* sizeof(gr_complex)), cudaHostAllocMapped || cudaHostAllocWriteCombined);
// Get space for the resampled early / prompt / late local replicas
- cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float), cudaHostAllocMapped || cudaHostAllocWriteCombined);
+ cudaHostAlloc((void**)&d_local_code_shift_chips, d_n_correlator_taps * sizeof(float), cudaHostAllocMapped || cudaHostAllocWriteCombined);
cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped || cudaHostAllocWriteCombined);
// correlator outputs (scalar)
- cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocMapped || cudaHostAllocWriteCombined );
+ cudaHostAlloc((void**)&d_correlator_outs ,sizeof(gr_complex)*d_n_correlator_taps, cudaHostAllocMapped || cudaHostAllocWriteCombined );
+
+ // Set TAPs delay values [chips]
+ d_local_code_shift_chips[0] = - d_early_late_spc_chips;
+ d_local_code_shift_chips[1] = 0.0;
+ d_local_code_shift_chips[2] = d_early_late_spc_chips;
- //map to EPL pointers
- d_Early = &d_corr_outs_gpu[0];
- d_Prompt = &d_corr_outs_gpu[1];
- d_Late = &d_corr_outs_gpu[2];
//--- Perform initializations ------------------------------
multicorrelator_gpu = new cuda_multicorrelator();
//local code resampler on GPU
- multicorrelator_gpu->init_cuda_integrated_resampler(2 * d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, 3);
- multicorrelator_gpu->set_input_output_vectors(d_corr_outs_gpu, in_gpu);
+ multicorrelator_gpu->init_cuda_integrated_resampler(2 * d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, d_n_correlator_taps);
+ multicorrelator_gpu->set_input_output_vectors(d_correlator_outs, in_gpu);
// define initial code frequency basis of NCO
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ;
// define residual code phase (in chips)
d_rem_code_phase_samples = 0.0;
// define residual carrier phase
- d_rem_carr_phase_rad = 0.0;
+ d_rem_carrier_phase_rad = 0.0;
// sample synchronization
d_sample_counter = 0;
@@ -156,8 +157,6 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
d_pull_in = false;
d_last_seg = 0;
- d_current_prn_length_samples = static_cast<int>(d_vector_length);
-
// CN0 estimation and lock detector buffers
d_cn0_estimation_counter = 0;
d_Prompt_buffer = new gr_complex[CN0_ESTIMATION_SAMPLES];
@@ -169,8 +168,7 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
systemName["G"] = std::string("GPS");
systemName["S"] = std::string("SBAS");
-
- set_relative_rate(1.0/((double)d_vector_length*2));
+ set_relative_rate(1.0 / (static_cast<double>(d_vector_length) * 2.0));
d_channel_internal_queue = 0;
d_acquisition_gnss_synchro = 0;
@@ -178,9 +176,13 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
d_acq_code_phase_samples = 0.0;
d_acq_carrier_doppler_hz = 0.0;
d_carrier_doppler_hz = 0.0;
- d_acc_carrier_phase_rad = 0.0;
+ d_acc_carrier_phase_cycles = 0.0;
d_code_phase_samples = 0.0;
- d_acc_code_phase_secs = 0.0;
+
+ d_pll_to_dll_assist_secs_Ti = 0.0;
+ d_rem_code_phase_chips = 0.0;
+ d_code_phase_step_chips = 0.0;
+ d_carrier_phase_step_rad = 0.0;
//set_min_output_buffer((long int)300);
}
@@ -192,7 +194,7 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
*/
d_acq_code_phase_samples = d_acquisition_gnss_synchro->Acq_delay_samples;
d_acq_carrier_doppler_hz = d_acquisition_gnss_synchro->Acq_doppler_hz;
- d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples;
+ d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples;
long int acq_trk_diff_samples;
double acq_trk_diff_seconds;
@@ -207,15 +209,16 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
double T_prn_mod_seconds;
double T_prn_mod_samples;
d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
- T_chip_mod_seconds = 1.0/d_code_freq_chips;
+ d_code_phase_step_chips = static_cast<double>(d_code_freq_chips) / static_cast<double>(d_fs_in);
+ T_chip_mod_seconds = 1/d_code_freq_chips;
T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_mod_samples = T_prn_mod_seconds * static_cast<double>(d_fs_in);
- d_current_prn_length_samples = round(T_prn_mod_samples);
+ d_correlation_length_samples = round(T_prn_mod_samples);
double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
double T_prn_true_samples = T_prn_true_seconds * static_cast<double>(d_fs_in);
- double T_prn_diff_seconds= T_prn_true_seconds - T_prn_mod_seconds;
+ double T_prn_diff_seconds = T_prn_true_seconds - T_prn_mod_seconds;
double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds;
double corrected_acq_phase_samples, delay_correction_samples;
corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<double>(d_fs_in)), T_prn_true_samples);
@@ -229,25 +232,28 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
d_carrier_doppler_hz = d_acq_carrier_doppler_hz;
+ d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
+
// DLL/PLL filter initialization
- d_carrier_loop_filter.initialize(); // initialize the carrier filter
+ d_carrier_loop_filter.initialize(d_acq_carrier_doppler_hz); //The carrier loop filter implements the Doppler accumulator
d_code_loop_filter.initialize(); // initialize the code filter
// generate local reference ALWAYS starting at chip 1 (1 sample per chip)
gps_l1_ca_code_gen_complex(d_ca_code, d_acquisition_gnss_synchro->PRN, 0);
- d_local_code_shift_chips[0] = - d_early_late_spc_chips;
- d_local_code_shift_chips[1] = 0.0;
- d_local_code_shift_chips[2] = d_early_late_spc_chips;
+ multicorrelator_gpu->set_local_code_and_taps(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips, d_n_correlator_taps);
- multicorrelator_gpu->set_local_code_and_taps(GPS_L1_CA_CODE_LENGTH_CHIPS, d_ca_code, d_local_code_shift_chips, 3);
+ for (int n = 0; n < d_n_correlator_taps; n++)
+ {
+ d_correlator_outs[n] = gr_complex(0,0);
+ }
d_carrier_lock_fail_counter = 0;
- d_rem_code_phase_samples = 0;
- d_rem_carr_phase_rad = 0;
- d_acc_carrier_phase_rad = 0;
- d_acc_code_phase_secs = 0;
-
+ d_rem_code_phase_samples = 0.0;
+ d_rem_carrier_phase_rad = 0.0;
+ d_rem_code_phase_chips = 0.0;
+ d_acc_carrier_phase_cycles = 0.0;
+ d_pll_to_dll_assist_secs_Ti = 0.0;
d_code_phase_samples = d_acq_code_phase_samples;
std::string sys_ = &d_acquisition_gnss_synchro->System;
@@ -270,14 +276,15 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
{
+
d_dump_file.close();
cudaFreeHost(in_gpu);
- cudaFreeHost(d_corr_outs_gpu);
+ cudaFreeHost(d_correlator_outs);
cudaFreeHost(d_local_code_shift_chips);
cudaFreeHost(d_ca_code);
multicorrelator_gpu->free_cuda();
- delete(multicorrelator_gpu);
delete[] d_Prompt_buffer;
+ delete(multicorrelator_gpu);
}
@@ -285,29 +292,34 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vector_int &ninput_items,
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
{
- // process vars
- double carr_error_hz=0.0;
- double carr_error_filt_hz=0.0;
- double code_error_chips=0.0;
- double code_error_filt_chips=0.0;
-
// Block input data and block output stream pointers
- const gr_complex* in = (gr_complex*) input_items[0];
+ const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignment
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
Gnss_Synchro current_synchro_data = Gnss_Synchro();
+ // process vars
+ double code_error_chips_Ti = 0.0;
+ double code_error_filt_chips = 0.0;
+ double code_error_filt_secs_Ti = 0.0;
+ double CURRENT_INTEGRATION_TIME_S;
+ double CORRECTED_INTEGRATION_TIME_S;
+ double dll_code_error_secs_Ti = 0.0;
+ double carr_phase_error_secs_Ti = 0.0;
+ double old_d_rem_code_phase_samples;
if (d_enable_tracking == true)
{
// Receiver signal alignment
if (d_pull_in == true)
{
int samples_offset;
+ double acq_trk_shif_correction_samples;
int acq_to_trk_delay_samples;
acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
- samples_offset = round(d_acq_code_phase_samples)+d_current_prn_length_samples - acq_to_trk_delay_samples%d_current_prn_length_samples;
- d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
+ acq_trk_shif_correction_samples = d_correlation_length_samples - fmod(static_cast<double>(acq_to_trk_delay_samples), static_cast<double>(d_correlation_length_samples));
+ samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
+ d_sample_counter += samples_offset; //count for the processed samples
d_pull_in = false;
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
@@ -319,46 +331,44 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
- // UPDATE NCO COMMAND
- double phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
+ // ################# CARRIER WIPEOFF AND CORRELATORS ##############################
+ // perform carrier wipe-off and compute Early, Prompt and Late correlation
- //code resampler on GPU (new)
- double code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
- double rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
-
- memcpy(in_gpu, in, sizeof(gr_complex) * d_current_prn_length_samples);
+ memcpy(in_gpu, in, sizeof(gr_complex) * d_correlation_length_samples);
cudaProfilerStart();
- multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda( static_cast<float>(d_rem_carr_phase_rad),
- static_cast<float>(phase_step_rad),
- static_cast<float>(code_phase_step_chips),
- static_cast<float>(rem_code_phase_chips),
- d_current_prn_length_samples, 3);
+ multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda( static_cast<float>(d_rem_carrier_phase_rad),
+ static_cast<float>(d_carrier_phase_step_rad),
+ static_cast<float>(d_code_phase_step_chips),
+ static_cast<float>(d_rem_code_phase_chips),
+ d_correlation_length_samples, d_n_correlator_taps);
cudaProfilerStop();
+ //std::cout<<"c_out[0]="<<d_correlator_outs[0]<<"c_out[1]="<<d_correlator_outs[1]<<"c_out[2]="<<d_correlator_outs[2]<<std::endl;
+
+ // UPDATE INTEGRATION TIME
+ CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
// ################## PLL ##########################################################
- // PLL discriminator
- carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI;
+ // Update PLL discriminator [rads/Ti -> Secs/Ti]
+ carr_phase_error_secs_Ti = pll_cloop_two_quadrant_atan(d_correlator_outs[1]) / GPS_TWO_PI; //prompt output
// Carrier discriminator filter
- carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
- // New carrier Doppler frequency estimation
- d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz;
- // New code Doppler frequency estimation
+ // NOTICE: The carrier loop filter includes the Carrier Doppler accumulator, as described in Kaplan
+ //d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_phase_error_filt_secs_ti/INTEGRATION_TIME;
+ // Input [s/Ti] -> output [Hz]
+ d_carrier_doppler_hz = d_carrier_loop_filter.get_carrier_error(0.0, carr_phase_error_secs_Ti, CURRENT_INTEGRATION_TIME_S);
+ // PLL to DLL assistance [Secs/Ti]
+ d_pll_to_dll_assist_secs_Ti = (d_carrier_doppler_hz * CURRENT_INTEGRATION_TIME_S) / GPS_L1_FREQ_HZ;
+ // code Doppler frequency update
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
- //carrier phase accumulator for (K) doppler estimation
- d_acc_carrier_phase_rad -= GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
- //remanent carrier phase to prevent overflow in the code NCO
- d_rem_carr_phase_rad = d_rem_carr_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
- d_rem_carr_phase_rad = fmod(d_rem_carr_phase_rad, GPS_TWO_PI);
// ################## DLL ##########################################################
// DLL discriminator
- code_error_chips = dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti]
+ code_error_chips_Ti = dll_nc_e_minus_l_normalized(d_correlator_outs[0], d_correlator_outs[2]); //[chips/Ti] //early and late
// Code discriminator filter
- code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
- //Code phase accumulator
- double code_error_filt_secs;
- code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds]
- d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs;
+ code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips_Ti); //input [chips/Ti] -> output [chips/second]
+ code_error_filt_secs_Ti = code_error_filt_chips*CURRENT_INTEGRATION_TIME_S/d_code_freq_chips; // [s/Ti]
+ // DLL code error estimation [s/Ti]
+ // TODO: PLL carrier aid to DLL is disabled. Re-enable it and measure performance
+ dll_code_error_secs_Ti = - code_error_filt_secs_Ti + d_pll_to_dll_assist_secs_Ti;
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
// keep alignment parameters for the next input buffer
@@ -367,17 +377,38 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
double T_prn_samples;
double K_blk_samples;
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
- T_chip_seconds = 1.0 / d_code_freq_chips;
+ T_chip_seconds = 1 / d_code_freq_chips;
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
- K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in);
- //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
-
- // ####### CN0 ESTIMATION AND LOCK DETECTORS ######
+ K_blk_samples = T_prn_samples + d_rem_code_phase_samples - dll_code_error_secs_Ti * static_cast<double>(d_fs_in);
+
+ d_correlation_length_samples = round(K_blk_samples); //round to a discrete samples
+ old_d_rem_code_phase_samples=d_rem_code_phase_samples;
+ d_rem_code_phase_samples = K_blk_samples - static_cast<double>(d_correlation_length_samples); //rounding error < 1 sample
+
+ // UPDATE REMNANT CARRIER PHASE
+ CORRECTED_INTEGRATION_TIME_S=(static_cast<double>(d_correlation_length_samples)/static_cast<double>(d_fs_in));
+ //remnant carrier phase [rad]
+ d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S, GPS_TWO_PI);
+ // UPDATE CARRIER PHASE ACCUULATOR
+ //carrier phase accumulator prior to update the PLL estimators (accumulated carrier in this loop depends on the old estimations!)
+ d_acc_carrier_phase_cycles -= d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S;
+
+ //################### PLL COMMANDS #################################################
+ //carrier phase step (NCO phase increment per sample) [rads/sample]
+ d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
+
+ //################### DLL COMMANDS #################################################
+ //code phase step (Code resampler phase increment per sample) [chips/sample]
+ d_code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
+ //remnant code phase [chips]
+ d_rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / static_cast<double>(d_fs_in));
+
+ // ####### CN0 ESTIMATION AND LOCK DETECTORS #######################################
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
- d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
+ d_Prompt_buffer[d_cn0_estimation_counter] = d_correlator_outs[1]; //prompt
d_cn0_estimation_counter++;
}
else
@@ -409,24 +440,15 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
}
- // ########### Output the tracking data to navigation and PVT ##########
- current_synchro_data.Prompt_I = static_cast<double>((*d_Prompt).real());
- current_synchro_data.Prompt_Q = static_cast<double>((*d_Prompt).imag());
-
- // Tracking_timestamp_secs is aligned with the NEXT PRN start sample (Hybridization problem!)
- //compute remnant code phase samples BEFORE the Tracking timestamp
- //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
- //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples)/static_cast<double>(d_fs_in);
-
- // Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!, but some glitches??)
- current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
- //compute remnant code phase samples AFTER the Tracking timestamp
- d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
- //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter)/static_cast<double>(d_fs_in);
+ // ########### Output the tracking data to navigation and PVT ##########
+ current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
+ current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
+ // Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
+ current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + old_d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs = 0;
- current_synchro_data.Carrier_phase_rads = d_acc_carrier_phase_rad;
+ current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles;
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
current_synchro_data.Flag_valid_pseudorange = false;
@@ -444,7 +466,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
d_last_seg = floor(d_sample_counter / d_fs_in);
std::cout << "Current input signal time = " << d_last_seg << " [s]" << std::endl;
DLOG(INFO) << "GPS L1 C/A Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)
- << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl;
+ << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl;
//if (d_last_seg==5) d_carrier_lock_fail_counter=500; //DEBUG: force unlock!
}
}
@@ -454,7 +476,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
{
d_last_seg = floor(d_sample_counter / d_fs_in);
DLOG(INFO) << "Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)
- << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]";
+ << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]";
}
}
}
@@ -477,9 +499,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
std::cout << tmp_str_stream.rdbuf() << std::flush;
}
}
- *d_Early = gr_complex(0,0);
- *d_Prompt = gr_complex(0,0);
- *d_Late = gr_complex(0,0);
+ for (int n = 0; n < d_n_correlator_taps; n++)
+ {
+ d_correlator_outs[n] = gr_complex(0,0);
+ }
current_synchro_data.System = {'G'};
current_synchro_data.Flag_valid_pseudorange = false;
@@ -492,74 +515,65 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
float prompt_I;
float prompt_Q;
float tmp_E, tmp_P, tmp_L;
- float tmp_float;
double tmp_double;
- prompt_I = (*d_Prompt).real();
- prompt_Q = (*d_Prompt).imag();
- tmp_E = std::abs<float>(*d_Early);
- tmp_P = std::abs<float>(*d_Prompt);
- tmp_L = std::abs<float>(*d_Late);
+ prompt_I = d_correlator_outs[1].real();
+ prompt_Q = d_correlator_outs[1].imag();
+ tmp_E = std::abs<float>(d_correlator_outs[0]);
+ tmp_P = std::abs<float>(d_correlator_outs[1]);
+ tmp_L = std::abs<float>(d_correlator_outs[2]);
try
{
-
- // EPR
- d_dump_file.write((char*)&tmp_E, sizeof(float));
- d_dump_file.write((char*)&tmp_P, sizeof(float));
- d_dump_file.write((char*)&tmp_L, sizeof(float));
- // PROMPT I and Q (to analyze navigation symbols)
- d_dump_file.write((char*)&prompt_I, sizeof(float));
- d_dump_file.write((char*)&prompt_Q, sizeof(float));
- // PRN start sample stamp
- //tmp_float=(float)d_sample_counter;
- d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int));
- // accumulated carrier phase
- tmp_float = d_acc_carrier_phase_rad;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
-
- // carrier and code frequency
- tmp_float = d_carrier_doppler_hz;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
- tmp_float = d_code_freq_chips;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
-
- //PLL commands
- tmp_float = carr_error_hz;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
- tmp_float = carr_error_filt_hz;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
-
- //DLL commands
- tmp_float = code_error_chips;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
- tmp_float = code_error_filt_chips;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
-
- // CN0 and carrier lock test
- tmp_float = d_CN0_SNV_dB_Hz;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
- tmp_float = d_carrier_lock_test;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
-
- // AUX vars (for debug purposes)
- tmp_float = d_rem_code_phase_samples;
- d_dump_file.write((char*)&tmp_float, sizeof(float));
- tmp_double = (double)(d_sample_counter + d_current_prn_length_samples);
- d_dump_file.write((char*)&tmp_double, sizeof(double));
+ // EPR
+ d_dump_file.write(reinterpret_cast<char*>(&tmp_E), sizeof(float));
+ d_dump_file.write(reinterpret_cast<char*>(&tmp_P), sizeof(float));
+ d_dump_file.write(reinterpret_cast<char*>(&tmp_L), sizeof(float));
+ // PROMPT I and Q (to analyze navigation symbols)
+ d_dump_file.write(reinterpret_cast<char*>(&prompt_I), sizeof(float));
+ d_dump_file.write(reinterpret_cast<char*>(&prompt_Q), sizeof(float));
+ // PRN start sample stamp
+ //tmp_float=(float)d_sample_counter;
+ d_dump_file.write(reinterpret_cast<char*>(&d_sample_counter), sizeof(unsigned long int));
+ // accumulated carrier phase
+ d_dump_file.write(reinterpret_cast<char*>(&d_acc_carrier_phase_cycles), sizeof(double));
+
+ // carrier and code frequency
+ d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(double));
+ d_dump_file.write(reinterpret_cast<char*>(&d_code_freq_chips), sizeof(double));
+
+ //PLL commands
+ d_dump_file.write(reinterpret_cast<char*>(&carr_phase_error_secs_Ti), sizeof(double));
+ d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(double));
+
+ //DLL commands
+ d_dump_file.write(reinterpret_cast<char*>(&code_error_chips_Ti), sizeof(double));
+ d_dump_file.write(reinterpret_cast<char*>(&code_error_filt_chips), sizeof(double));
+
+ // CN0 and carrier lock test
+ d_dump_file.write(reinterpret_cast<char*>(&d_CN0_SNV_dB_Hz), sizeof(double));
+ d_dump_file.write(reinterpret_cast<char*>(&d_carrier_lock_test), sizeof(double));
+
+ // AUX vars (for debug purposes)
+ tmp_double = d_rem_code_phase_samples;
+ d_dump_file.write(reinterpret_cast<char*>(&tmp_double), sizeof(double));
+ tmp_double = static_cast<double>(d_sample_counter + d_correlation_length_samples);
+ d_dump_file.write(reinterpret_cast<char*>(&tmp_double), sizeof(double));
}
- catch (std::ifstream::failure e)
+ catch (const std::ifstream::failure* e)
{
- LOG(WARNING) << "Exception writing trk dump file " << e.what();
+ LOG(WARNING) << "Exception writing trk dump file " << e->what();
}
}
- consume_each(d_current_prn_length_samples); // this is necessary in gr::block derivates
- d_sample_counter += d_current_prn_length_samples; //count for the processed samples
- //LOG(INFO)<<"GPS tracking output end on CH="<<this->d_channel << " SAMPLE STAMP="<<d_sample_counter<<std::endl;
+ consume_each(d_correlation_length_samples); // this is necessary in gr::block derivates
+ d_sample_counter += d_correlation_length_samples; //count for the processed samples
+
+ if((noutput_items == 0) || (ninput_items[0] == 0))
+ {
+ LOG(WARNING) << "noutput_items = 0";
+ }
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
}
-
-
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel(unsigned int channel)
{
d_channel = channel;
@@ -577,22 +591,19 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel(unsigned int channel)
d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary);
LOG(INFO) << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str() << std::endl;
}
- catch (std::ifstream::failure e)
+ catch (const std::ifstream::failure* e)
{
- LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e.what() << std::endl;
+ LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e->what() << std::endl;
}
}
}
}
-
-
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue)
{
d_channel_internal_queue = channel_internal_queue;
}
-
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
{
d_acquisition_gnss_synchro = p_gnss_synchro;
diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h
index b6842f4..7c08e12 100644
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h
@@ -48,7 +48,7 @@
#include "gps_sdr_signal_processing.h"
#include "gnss_synchro.h"
#include "tracking_2nd_DLL_filter.h"
-#include "tracking_2nd_PLL_filter.h"
+#include "tracking_FLL_PLL_filter.h"
#include "cuda_multicorrelator.h"
class Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc;
@@ -124,12 +124,13 @@ private:
long d_fs_in;
double d_early_late_spc_chips;
+ int d_n_correlator_taps;
//GPU HOST PINNED MEMORY IN/OUT VECTORS
gr_complex* in_gpu;
float* d_local_code_shift_chips;
- gr_complex* d_corr_outs_gpu;
+ gr_complex* d_correlator_outs;
cuda_multicorrelator *multicorrelator_gpu;
gr_complex* d_ca_code;
@@ -140,11 +141,12 @@ private:
// remaining code phase and carrier phase between tracking loops
double d_rem_code_phase_samples;
- double d_rem_carr_phase_rad;
+ double d_rem_code_phase_chips;
+ double d_rem_carrier_phase_rad;
// PLL and DLL filter library
Tracking_2nd_DLL_filter d_code_loop_filter;
- Tracking_2nd_PLL_filter d_carrier_loop_filter;
+ Tracking_FLL_PLL_filter d_carrier_loop_filter;
// acquisition
double d_acq_code_phase_samples;
@@ -152,13 +154,15 @@ private:
// tracking vars
double d_code_freq_chips;
+ double d_code_phase_step_chips;
double d_carrier_doppler_hz;
- double d_acc_carrier_phase_rad;
+ double d_carrier_phase_step_rad;
+ double d_acc_carrier_phase_cycles;
double d_code_phase_samples;
- double d_acc_code_phase_secs;
+ double d_pll_to_dll_assist_secs_Ti;
- //PRN period in samples
- int d_current_prn_length_samples;
+ //Integration period in samples
+ int d_correlation_length_samples;
//processing samples counters
unsigned long int d_sample_counter;
diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
index 6ebec80..61dc305 100644
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -47,7 +47,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
GPU_Complex *d_sig_wiped,
GPU_Complex *d_local_code_in,
float *d_shifts_chips,
- float code_length_chips,
+ int code_length_chips,
float code_phase_step_chips,
float rem_code_phase_chips,
int vectorN,
@@ -90,7 +90,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
{
GPU_Complex sum = GPU_Complex(0,0);
- float local_code_chip_index;
+ float local_code_chip_index=0.0;;
//float code_phase;
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
{
@@ -105,7 +105,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
local_code_chip_index= fmodf(code_phase_step_chips*__int2float_rd(pos)+ d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
//Take into account that in multitap correlators, the shifts can be negative!
- if (local_code_chip_index<0.0) local_code_chip_index+=code_length_chips;
+ if (local_code_chip_index<0.0) local_code_chip_index+=(code_length_chips-1);
//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
// 2.correlate
sum.multiply_acc(d_sig_wiped[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]);
@@ -143,52 +143,52 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
// findCudaDevice(argc, (const char **)argv);
-// cudaDeviceProp prop;
-// int num_devices, device;
-// cudaGetDeviceCount(&num_devices);
-// if (num_devices > 1) {
-// int max_multiprocessors = 0, max_device = 0;
-// for (device = 0; device < num_devices; device++) {
-// cudaDeviceProp properties;
-// cudaGetDeviceProperties(&properties, device);
-// if (max_multiprocessors < properties.multiProcessorCount) {
-// max_multiprocessors = properties.multiProcessorCount;
-// max_device = device;
-// }
-// printf("Found GPU device # %i\n",device);
-// }
-// //cudaSetDevice(max_device);
-//
-// //set random device!
-// cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs
-//
-// cudaGetDeviceProperties( &prop, max_device );
-// //debug code
-// if (prop.canMapHostMemory != 1) {
-// printf( "Device can not map memory.\n" );
-// }
-// printf("L2 Cache size= %u \n",prop.l2CacheSize);
-// printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
-// printf("maxGridSize= %i \n",prop.maxGridSize[0]);
-// printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
-// printf("deviceOverlap= %i \n",prop.deviceOverlap);
-// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
-// }else{
-// int whichDevice;
-// cudaGetDevice( &whichDevice );
-// cudaGetDeviceProperties( &prop, whichDevice );
-// //debug code
-// if (prop.canMapHostMemory != 1) {
-// printf( "Device can not map memory.\n" );
-// }
-//
-// printf("L2 Cache size= %u \n",prop.l2CacheSize);
-// printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
-// printf("maxGridSize= %i \n",prop.maxGridSize[0]);
-// printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
-// printf("deviceOverlap= %i \n",prop.deviceOverlap);
-// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
-// }
+ cudaDeviceProp prop;
+ int num_devices, device;
+ cudaGetDeviceCount(&num_devices);
+ if (num_devices > 1) {
+ int max_multiprocessors = 0, max_device = 0;
+ for (device = 0; device < num_devices; device++) {
+ cudaDeviceProp properties;
+ cudaGetDeviceProperties(&properties, device);
+ if (max_multiprocessors < properties.multiProcessorCount) {
+ max_multiprocessors = properties.multiProcessorCount;
+ max_device = device;
+ }
+ printf("Found GPU device # %i\n",device);
+ }
+ //cudaSetDevice(max_device);
+
+ //set random device!
+ cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs
+
+ cudaGetDeviceProperties( &prop, max_device );
+ //debug code
+ if (prop.canMapHostMemory != 1) {
+ printf( "Device can not map memory.\n" );
+ }
+ printf("L2 Cache size= %u \n",prop.l2CacheSize);
+ printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
+ printf("maxGridSize= %i \n",prop.maxGridSize[0]);
+ printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
+ printf("deviceOverlap= %i \n",prop.deviceOverlap);
+ printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
+ }else{
+ int whichDevice;
+ cudaGetDevice( &whichDevice );
+ cudaGetDeviceProperties( &prop, whichDevice );
+ //debug code
+ if (prop.canMapHostMemory != 1) {
+ printf( "Device can not map memory.\n" );
+ }
+
+ printf("L2 Cache size= %u \n",prop.l2CacheSize);
+ printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
+ printf("maxGridSize= %i \n",prop.maxGridSize[0]);
+ printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
+ printf("deviceOverlap= %i \n",prop.deviceOverlap);
+ printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
+ }
// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
@@ -228,7 +228,7 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
// Launch the Vector Add CUDA Kernel
// TODO: write a smart load balance using device info!
threadsPerBlock = 64;
- blocksPerGrid =(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
+ blocksPerGrid = 128;//(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
cudaStreamCreate (&stream1) ;
//cudaStreamCreate (&stream2) ;
@@ -261,7 +261,7 @@ bool cuda_multicorrelator::set_local_code_and_taps(
//******** CudaMalloc version ***********
//local code CPU -> GPU copy memory
cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1);
- d_code_length_chips=(float)code_length_chips;
+ d_code_length_chips=code_length_chips;
//Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
@@ -292,6 +292,17 @@ bool cuda_multicorrelator::set_input_output_vectors(
return true;
}
+
+#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
+{
+ if (code != cudaSuccess)
+ {
+ fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+ if (abort) exit(code);
+ }
+}
+
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
float rem_carrier_phase_in_rad,
float phase_step_rad,
@@ -325,14 +336,14 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
phase_step_rad
);
- //cudaGetLastError();
- //wait for correlators end...
- cudaStreamSynchronize(stream1);
+ gpuErrchk( cudaPeekAtLastError() );
+ gpuErrchk( cudaStreamSynchronize(stream1));
+
// cudaMemCpy version
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
- //cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
+ //cudaMemcpyAsync(d_corr_out_cpu, d_corr_out, sizeof(std::complex<float>)*n_correlators,
// cudaMemcpyDeviceToHost,stream1);
return true;
}
diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.h b/src/algorithms/tracking/libs/cuda_multicorrelator.h
index 161cbfe..4b11f85 100644
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.h
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.h
@@ -155,7 +155,7 @@ private:
int *d_shifts_samples;
float *d_shifts_chips;
- float d_code_length_chips;
+ int d_code_length_chips;
int threadsPerBlock;
int blocksPerGrid;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list