[hamradio-commits] [gnss-sdr] 85/126: Migrating cuda tracking internal DLL/PLL vars from float to double

Carles Fernandez carles_fernandez-guest at moszumanska.debian.org
Sat Dec 26 18:38:04 UTC 2015


This is an automated email from the git hooks/post-receive script.

carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.

commit 424e7abe68a063c87cda7ad06814083a82b5c291
Author: Javier Arribas <javiarribas at gmail.com>
Date:   Wed Dec 2 19:00:29 2015 +0100

    Migrating cuda tracking internal DLL/PLL vars from float to double
---
 conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf           |  22 ++--
 conf/gnss-sdr_Hybrid_byte_sim.conf                 |   2 +-
 .../gps_l1_ca_dll_pll_tracking_gpu_cc.cc           | 143 +++++++++++----------
 .../gps_l1_ca_dll_pll_tracking_gpu_cc.h            |  20 +--
 .../tracking/libs/cuda_multicorrelator.cu          | 134 +------------------
 5 files changed, 99 insertions(+), 222 deletions(-)

diff --git a/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf b/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf
index 8787ef5..52b1781 100644
--- a/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf
+++ b/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf
@@ -17,10 +17,10 @@ ControlThread.wait_for_flowgraph=false
 SignalSource.implementation=File_Signal_Source
 
 ;#filename: path to file with the captured GNSS signal samples to be processed
-SignalSource.filename=/media/javier/SISTEMA/signals/New York/4msps.dat
+SignalSource.filename=/home/javier/ClionProjects/gnss-sim/build/signal_out.bin
 
 ;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
-SignalSource.item_type=gr_complex
+SignalSource.item_type=byte
 
 ;#sampling_frequency: Original Signal sampling frequency in [Hz] 
 SignalSource.sampling_frequency=4000000
@@ -28,12 +28,6 @@ SignalSource.sampling_frequency=4000000
 ;#freq: RF front-end center frequency in [Hz] 
 SignalSource.freq=1575420000
 
-;#gain: Front-end Gain in [dB] 
-SignalSource.gain=60
-
-;#subdevice: UHD subdevice specification (for USRP1 use A:0 or B:0)
-SignalSource.subdevice=B:0
-
 ;#samples: Number of samples to be processed. Notice that 0 indicates the entire file.
 SignalSource.samples=0
 
@@ -58,12 +52,12 @@ SignalSource.enable_throttle_control=false
 ;#[Pass_Through] disables this block and the [DataTypeAdapter], [InputFilter] and [Resampler] blocks
 ;#[Signal_Conditioner] enables this block. Then you have to configure [DataTypeAdapter], [InputFilter] and [Resampler] blocks
 ;SignalConditioner.implementation=Signal_Conditioner
-SignalConditioner.implementation=Pass_Through
+SignalConditioner.implementation=Signal_Conditioner
 
 ;######### DATA_TYPE_ADAPTER CONFIG ############
 ;## Changes the type of input data. Please disable it in this version.
 ;#implementation: [Pass_Through] disables this block
-DataTypeAdapter.implementation=Pass_Through
+DataTypeAdapter.implementation=Ibyte_To_Complex
 
 ;######### INPUT_FILTER CONFIG ############
 ;## Filter the input data. Can be combined with frequency translation for IF signals
@@ -210,13 +204,13 @@ Acquisition_GPS.sampled_ms=1
 ;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
 Acquisition_GPS.implementation=GPS_L1_CA_PCPS_Acquisition
 ;#threshold: Acquisition threshold
-Acquisition_GPS.threshold=0.005
+Acquisition_GPS.threshold=0.06
 ;#pfa: Acquisition false alarm probability. This option overrides the threshold option. Only use with implementations: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition] 
 ;Acquisition_GPS.pfa=0.01
 ;#doppler_max: Maximum expected Doppler shift [Hz]
-Acquisition_GPS.doppler_max=10000
+Acquisition_GPS.doppler_max=6000
 ;#doppler_max: Doppler step in the grid search [Hz]
-Acquisition_GPS.doppler_step=500
+Acquisition_GPS.doppler_step=100
 
 ;######### TRACKING GLOBAL CONFIG ############
 
@@ -235,7 +229,7 @@ Tracking_GPS.dump=true
 Tracking_GPS.dump_filename=../data/epl_tracking_ch_
 
 ;#pll_bw_hz: PLL loop filter bandwidth [Hz]
-Tracking_GPS.pll_bw_hz=55.0;
+Tracking_GPS.pll_bw_hz=15.0;
 
 ;#dll_bw_hz: DLL loop filter bandwidth [Hz]
 Tracking_GPS.dll_bw_hz=1.5
diff --git a/conf/gnss-sdr_Hybrid_byte_sim.conf b/conf/gnss-sdr_Hybrid_byte_sim.conf
index 2f8cb06..5102e35 100644
--- a/conf/gnss-sdr_Hybrid_byte_sim.conf
+++ b/conf/gnss-sdr_Hybrid_byte_sim.conf
@@ -233,7 +233,7 @@ Acquisition_1B.doppler_step=125
 ;######### TRACKING GPS CONFIG ############
 
 ;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking] or [GPS_L1_CA_TCP_CONNECTOR_Tracking] or [Galileo_E1_DLL_PLL_VEML_Tracking]
-Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Artemisa_Tracking
+Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking
 ;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version.
 Tracking_1C.item_type=gr_complex
 
diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
index fb38759..d4fffeb 100644
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
@@ -195,30 +195,30 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
     d_acq_sample_stamp =  d_acquisition_gnss_synchro->Acq_samplestamp_samples;
 
     long int acq_trk_diff_samples;
-    float acq_trk_diff_seconds;
+    double acq_trk_diff_seconds;
     acq_trk_diff_samples = static_cast<long int>(d_sample_counter) - static_cast<long int>(d_acq_sample_stamp);//-d_vector_length;
     DLOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples;
-    acq_trk_diff_seconds = static_cast<float>(acq_trk_diff_samples) / static_cast<float>(d_fs_in);
+    acq_trk_diff_seconds = static_cast<double>(acq_trk_diff_samples) / static_cast<double>(d_fs_in);
     //doppler effect
     // Fd=(C/(C+Vr))*F
-    float radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ;
+    double radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ;
     // new chip and prn sequence periods based on acq Doppler
-    float T_chip_mod_seconds;
-    float T_prn_mod_seconds;
-    float T_prn_mod_samples;
+    double T_chip_mod_seconds;
+    double T_prn_mod_seconds;
+    double T_prn_mod_samples;
     d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
-    T_chip_mod_seconds = 1/d_code_freq_chips;
+    T_chip_mod_seconds = 1.0/d_code_freq_chips;
     T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
-    T_prn_mod_samples = T_prn_mod_seconds * static_cast<float>(d_fs_in);
+    T_prn_mod_samples = T_prn_mod_seconds * static_cast<double>(d_fs_in);
 
     d_current_prn_length_samples = round(T_prn_mod_samples);
 
-    float T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
-    float T_prn_true_samples = T_prn_true_seconds * static_cast<float>(d_fs_in);
-    float T_prn_diff_seconds=  T_prn_true_seconds - T_prn_mod_seconds;
-    float N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds;
-    float corrected_acq_phase_samples, delay_correction_samples;
-    corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<float>(d_fs_in)), T_prn_true_samples);
+    double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
+    double T_prn_true_samples = T_prn_true_seconds * static_cast<double>(d_fs_in);
+    double T_prn_diff_seconds=  T_prn_true_seconds - T_prn_mod_seconds;
+    double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds;
+    double corrected_acq_phase_samples, delay_correction_samples;
+    corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<double>(d_fs_in)), T_prn_true_samples);
     if (corrected_acq_phase_samples < 0)
         {
             corrected_acq_phase_samples = T_prn_mod_samples + corrected_acq_phase_samples;
@@ -286,10 +286,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
         gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
 {
     // process vars
-    float carr_error_hz=0.0;
-    float carr_error_filt_hz=0.0;
-    float code_error_chips=0.0;
-    float code_error_filt_chips=0.0;
+	double carr_error_hz=0.0;
+	double carr_error_filt_hz=0.0;
+	double code_error_chips=0.0;
+	double code_error_filt_chips=0.0;
 
     // Block input data and block output stream pointers
     const gr_complex* in = (gr_complex*) input_items[0];
@@ -320,20 +320,24 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
             current_synchro_data = *d_acquisition_gnss_synchro;
 
             // UPDATE NCO COMMAND
-            float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
+            double phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
 
         	//code resampler on GPU (new)
-            float code_phase_step_chips = static_cast<float>(d_code_freq_chips) / static_cast<float>(d_fs_in);
-            float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
+            double code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
+            double rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
 
             memcpy(in_gpu, in, sizeof(gr_complex) * d_current_prn_length_samples);
             cudaProfilerStart();
-            multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carr_phase_rad, phase_step_rad, code_phase_step_chips, rem_code_phase_chips, d_current_prn_length_samples, 3);
+            multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda( static_cast<float>(d_rem_carr_phase_rad),
+            		static_cast<float>(phase_step_rad),
+            		static_cast<float>(code_phase_step_chips),
+            		static_cast<float>(rem_code_phase_chips),
+            		d_current_prn_length_samples, 3);
             cudaProfilerStop();
 
             // ################## PLL ##########################################################
             // PLL discriminator
-            carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / static_cast<float>(GPS_TWO_PI);
+            carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI;
             // Carrier discriminator filter
             carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
             // New carrier Doppler frequency estimation
@@ -352,7 +356,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
             // Code discriminator filter
             code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
             //Code phase accumulator
-            float code_error_filt_secs;
+            double code_error_filt_secs;
             code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds]
             d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs;
 
@@ -363,10 +367,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
             double T_prn_samples;
             double K_blk_samples;
             // Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
-            T_chip_seconds = 1 / static_cast<double>(d_code_freq_chips);
+            T_chip_seconds = 1.0 / d_code_freq_chips;
             T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
             T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
-            K_blk_samples = T_prn_samples + d_rem_code_phase_samples + static_cast<double>(code_error_filt_secs) * static_cast<double>(d_fs_in);
+            K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in);
             //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
 
             // ####### CN0 ESTIMATION AND LOCK DETECTORS ######
@@ -415,16 +419,16 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
             //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples)/static_cast<double>(d_fs_in);
 
             // Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!, but some glitches??)
-            current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + static_cast<double>(d_rem_code_phase_samples)) / static_cast<double>(d_fs_in);
+            current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
             //compute remnant code phase samples AFTER the Tracking timestamp
             d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
 
             //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter)/static_cast<double>(d_fs_in);
             // This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
             current_synchro_data.Code_phase_secs = 0;
-            current_synchro_data.Carrier_phase_rads = static_cast<double>(d_acc_carrier_phase_rad);
-            current_synchro_data.Carrier_Doppler_hz = static_cast<double>(d_carrier_doppler_hz);
-            current_synchro_data.CN0_dB_hz = static_cast<double>(d_CN0_SNV_dB_Hz);
+            current_synchro_data.Carrier_phase_rads = d_acc_carrier_phase_rad;
+            current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;
+            current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
             current_synchro_data.Flag_valid_pseudorange = false;
             *out[0] = current_synchro_data;
 
@@ -497,41 +501,50 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
             tmp_L = std::abs<float>(*d_Late);
             try
             {
-                    // EPR
-                    d_dump_file.write(reinterpret_cast<char*>(&tmp_E), sizeof(float));
-                    d_dump_file.write(reinterpret_cast<char*>(&tmp_P), sizeof(float));
-                    d_dump_file.write(reinterpret_cast<char*>(&tmp_L), sizeof(float));
-                    // PROMPT I and Q (to analyze navigation symbols)
-                    d_dump_file.write(reinterpret_cast<char*>(&prompt_I), sizeof(float));
-                    d_dump_file.write(reinterpret_cast<char*>(&prompt_Q), sizeof(float));
-                    // PRN start sample stamp
-                    //tmp_float=(float)d_sample_counter;
-                    d_dump_file.write(reinterpret_cast<char*>(&d_sample_counter), sizeof(unsigned long int));
-                    // accumulated carrier phase
-                    d_dump_file.write(reinterpret_cast<char*>(&d_acc_carrier_phase_rad), sizeof(float));
-
-                    // carrier and code frequency
-                    d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
-                    tmp_float=d_code_freq_chips;
-                    d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
-
-                    //PLL commands
-                    d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));
-                    d_dump_file.write(reinterpret_cast<char*>(&carr_error_filt_hz), sizeof(float));
-
-                    //DLL commands
-                    d_dump_file.write(reinterpret_cast<char*>(&code_error_chips), sizeof(float));
-                    d_dump_file.write(reinterpret_cast<char*>(&code_error_filt_chips), sizeof(float));
-
-                    // CN0 and carrier lock test
-                    d_dump_file.write(reinterpret_cast<char*>(&d_CN0_SNV_dB_Hz), sizeof(float));
-                    d_dump_file.write(reinterpret_cast<char*>(&d_carrier_lock_test), sizeof(float));
-
-                    // AUX vars (for debug purposes)
-                    tmp_float = d_rem_code_phase_samples;
-                    d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
-                    tmp_double = static_cast<double>(d_sample_counter + d_current_prn_length_samples);
-                    d_dump_file.write(reinterpret_cast<char*>(&tmp_double), sizeof(double));
+
+                // EPR
+                d_dump_file.write((char*)&tmp_E, sizeof(float));
+                d_dump_file.write((char*)&tmp_P, sizeof(float));
+                d_dump_file.write((char*)&tmp_L, sizeof(float));
+                // PROMPT I and Q (to analyze navigation symbols)
+                d_dump_file.write((char*)&prompt_I, sizeof(float));
+                d_dump_file.write((char*)&prompt_Q, sizeof(float));
+                // PRN start sample stamp
+                //tmp_float=(float)d_sample_counter;
+                d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int));
+                // accumulated carrier phase
+                tmp_float = d_acc_carrier_phase_rad;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+
+                // carrier and code frequency
+                tmp_float = d_carrier_doppler_hz;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+                tmp_float = d_code_freq_chips;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+
+                //PLL commands
+                tmp_float = carr_error_hz;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+                tmp_float = carr_error_filt_hz;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+
+                //DLL commands
+                tmp_float = code_error_chips;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+                tmp_float = code_error_filt_chips;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+
+                // CN0 and carrier lock test
+                tmp_float = d_CN0_SNV_dB_Hz;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+                tmp_float = d_carrier_lock_test;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+
+                // AUX vars (for debug purposes)
+                tmp_float = d_rem_code_phase_samples;
+                d_dump_file.write((char*)&tmp_float, sizeof(float));
+                tmp_double = (double)(d_sample_counter + d_current_prn_length_samples);
+                d_dump_file.write((char*)&tmp_double, sizeof(double));
             }
             catch (std::ifstream::failure e)
             {
diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h
index e632c48..b6842f4 100644
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h
@@ -140,22 +140,22 @@ private:
 
     // remaining code phase and carrier phase between tracking loops
     double d_rem_code_phase_samples;
-    float d_rem_carr_phase_rad;
+    double d_rem_carr_phase_rad;
 
     // PLL and DLL filter library
     Tracking_2nd_DLL_filter d_code_loop_filter;
     Tracking_2nd_PLL_filter d_carrier_loop_filter;
 
     // acquisition
-    float d_acq_code_phase_samples;
-    float d_acq_carrier_doppler_hz;
+    double d_acq_code_phase_samples;
+    double d_acq_carrier_doppler_hz;
 
     // tracking vars
     double d_code_freq_chips;
-    float d_carrier_doppler_hz;
-    float d_acc_carrier_phase_rad;
-    float d_code_phase_samples;
-    float d_acc_code_phase_secs;
+    double d_carrier_doppler_hz;
+    double d_acc_carrier_phase_rad;
+    double d_code_phase_samples;
+    double d_acc_code_phase_secs;
 
     //PRN period in samples
     int d_current_prn_length_samples;
@@ -167,9 +167,9 @@ private:
     // CN0 estimation and lock detector
     int d_cn0_estimation_counter;
     gr_complex* d_Prompt_buffer;
-    float d_carrier_lock_test;
-    float d_CN0_SNV_dB_Hz;
-    float d_carrier_lock_threshold;
+    double d_carrier_lock_test;
+    double d_CN0_SNV_dB_Hz;
+    double d_carrier_lock_threshold;
     int d_carrier_lock_fail_counter;
 
     // control vars
diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
index 5017d14..6ebec80 100644
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -41,103 +41,6 @@
 
 #define ACCUM_N 128
 
-__global__ void scalarProdGPUCPXxN_shifts_chips(
-    GPU_Complex *d_corr_out,
-    GPU_Complex *d_sig_in,
-    GPU_Complex *d_local_code_in,
-    float *d_shifts_chips,
-    float code_length_chips,
-    float code_phase_step_chips,
-    float rem_code_phase_chips,
-    int vectorN,
-    int elementN
-)
-{
-    //Accumulators cache
-    __shared__ GPU_Complex accumResult[ACCUM_N];
-
-    ////////////////////////////////////////////////////////////////////////////
-    // Cycle through every pair of vectors,
-    // taking into account that vector counts can be different
-    // from total number of thread blocks
-    ////////////////////////////////////////////////////////////////////////////
-    for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
-    {
-        //int vectorBase = IMUL(elementN, vec);
-        //int vectorEnd  = elementN;
-
-        ////////////////////////////////////////////////////////////////////////
-        // Each accumulator cycles through vectors with
-        // stride equal to number of total number of accumulators ACCUM_N
-        // At this stage ACCUM_N is only preferred be a multiple of warp size
-        // to meet memory coalescing alignment constraints.
-        ////////////////////////////////////////////////////////////////////////
-        for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
-        {
-        	GPU_Complex sum = GPU_Complex(0,0);
-
-            for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
-            {
-            	//original sample code
-                //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
-            	//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
-            	//sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]);
-
-            	//custom code for multitap correlator
-            	// 1.resample local code for the current shift
-            	float local_code_chip_index= fmod(code_phase_step_chips*(float)pos + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
-            	//Take into account that in multitap correlators, the shifts can be negative!
-            	if (local_code_chip_index<0.0) local_code_chip_index+=code_length_chips;
-            	//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
-            	// 2.correlate
-            	sum.multiply_acc(d_sig_in[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]);
-
-            }
-            accumResult[iAccum] = sum;
-        }
-
-        ////////////////////////////////////////////////////////////////////////
-        // Perform tree-like reduction of accumulators' results.
-        // ACCUM_N has to be power of two at this stage
-        ////////////////////////////////////////////////////////////////////////
-        for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
-        {
-            __syncthreads();
-
-            for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
-            {
-                accumResult[iAccum] += accumResult[stride + iAccum];
-            }
-        }
-
-        if (threadIdx.x == 0)
-        	{
-        		d_corr_out[vec] = accumResult[0];
-        	}
-    }
-}
-
-/**
- * CUDA Kernel Device code
- *
- * Computes the carrier Doppler wipe-off by integrating the NCO in the CUDA kernel
- */
-__global__ void
-CUDA_32fc_Doppler_wipeoff(  GPU_Complex *sig_out, GPU_Complex *sig_in, float rem_carrier_phase_in_rad, float phase_step_rad, int numElements)
-{
-	// CUDA version of floating point NCO and vector dot product integrated
-    float sin;
-    float cos;
-    for (int i = blockIdx.x * blockDim.x + threadIdx.x;
-         i < numElements;
-         i += blockDim.x * gridDim.x)
-    {
-    	__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos);
-    	sig_out[i] =  sig_in[i] * GPU_Complex(cos,-sin);
-    }
-}
-
-
 __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
     GPU_Complex *d_corr_out,
     GPU_Complex *d_sig_in,
@@ -398,37 +301,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
 		int n_correlators)
 	{
 
-
 	// cudaMemCpy version
 	//size_t memSize = signal_length_samples * sizeof(std::complex<float>);
 	// input signal CPU -> GPU copy memory
     //cudaMemcpyAsync(d_sig_in, d_sig_in_cpu, memSize,
     //                               cudaMemcpyHostToDevice, stream2);
-
     //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
-    //Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
-    //cudaStreamSynchronize(stream2);
-
-    //CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream1>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
-
-    //wait for Doppler wipeoff end...
-    //cudaStreamSynchronize(stream1);
-    //cudaStreamSynchronize(stream2);
 
     //launch the multitap correlator with integrated local code resampler!
 
-//    scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>(
-//			d_corr_out,
-//			d_sig_doppler_wiped,
-//			d_local_codes_in,
-//			d_shifts_chips,
-//			d_code_length_chips,
-//	        code_phase_step_chips,
-//	        rem_code_phase_chips,
-//			n_correlators,
-//			signal_length_samples
-//		);
-
     Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>(
 			d_corr_out,
 			d_sig_in,
@@ -444,25 +325,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
 			phase_step_rad
 			);
 
-    //debug
-//	std::complex<float>* debug_signal;
-//	debug_signal=static_cast<std::complex<float>*>(malloc(memSize));
-//    cudaMemcpyAsync(debug_signal, d_sig_doppler_wiped, memSize,
-//            cudaMemcpyDeviceToHost,stream1);
-//    cudaStreamSynchronize(stream1);
-//	std::cout<<"d_sig_doppler_wiped GPU="<<debug_signal[456]<<","<<debug_signal[1]<<","<<debug_signal[2]<<","<<debug_signal[3]<<std::endl;
-
     //cudaGetLastError();
     //wait for correlators end...
-    //cudaStreamSynchronize(stream1);
+    cudaStreamSynchronize(stream1);
+	// cudaMemCpy version
     // Copy the device result vector in device memory to the host result vector
     // in host memory.
-
     //scalar products (correlators outputs)
     //cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
     //        cudaMemcpyDeviceToHost,stream1);
-
-    cudaStreamSynchronize(stream1);
     return true;
 }
 
@@ -490,7 +361,6 @@ bool cuda_multicorrelator::free_cuda()
 	if (d_corr_out!=NULL) cudaFree(d_corr_out);
 	if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples);
 	if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips);
-
     // Reset the device and exit
     // cudaDeviceReset causes the driver to clean up all state. While
     // not mandatory in normal operation, it is good practice.  It is also

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git



More information about the pkg-hamradio-commits mailing list