[hamradio-commits] [gnss-sdr] 14/22: Adding CPU and GPU multicorrelators unit tests
Carles Fernandez
carles_fernandez-guest at moszumanska.debian.org
Thu Jun 30 20:11:29 UTC 2016
This is an automated email from the git hooks/post-receive script.
carles_fernandez-guest pushed a commit to branch next
in repository gnss-sdr.
commit c8d7a607f850a677df7032ed5087d191ce0003f1
Author: Javier Arribas <jarribas at cttc.es>
Date: Fri Jun 17 12:48:31 2016 +0200
Adding CPU and GPU multicorrelators unit tests
---
.../tracking/libs/cuda_multicorrelator.cu | 2 +-
src/tests/CMakeLists.txt | 10 ++
src/tests/arithmetic/cpu_multicorrelator_test.cc | 167 +++++++++++++++++++++
src/tests/arithmetic/gpu_multicorrelator_test.cc | 166 ++++++++++++++++++++
src/tests/test_main.cc | 6 +
5 files changed, 350 insertions(+), 1 deletion(-)
diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
index 61dc305..78c3612 100644
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -198,7 +198,7 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
//********* ZERO COPY VERSION ************
// Set flag to enable zero copy access
// Optimal in shared memory devices (like Jetson K1)
- cudaSetDeviceFlags(cudaDeviceMapHost);
+ //cudaSetDeviceFlags(cudaDeviceMapHost);
//******** CudaMalloc version ***********
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index ce8c40e..08ad88e 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -33,6 +33,7 @@ if(GTEST_INCLUDE_DIRS)
set(GTEST_DIR_LOCAL true)
endif(GTEST_INCLUDE_DIRS)
+
if(NOT ${GTEST_DIR_LOCAL})
# if GTEST_DIR is not defined, we download and build it
set(gtest_RELEASE 1.7.0)
@@ -86,6 +87,11 @@ endif(NOT ${GTEST_DIR_LOCAL})
set(GNSS_SDR_TEST_OPTIONAL_LIBS "")
set(GNSS_SDR_TEST_OPTIONAL_HEADERS "")
+if(ENABLE_CUDA)
+ set(GNSS_SDR_TEST_OPTIONAL_HEADERS ${GNSS_SDR_TEST_OPTIONAL_HEADERS} ${CUDA_INCLUDE_DIRS})
+ set(GNSS_SDR_TEST_OPTIONAL_LIBS ${GNSS_SDR_TEST_OPTIONAL_LIBS} ${CUDA_LIBRARIES})
+endif(ENABLE_CUDA)
+
if(ENABLE_GPERFTOOLS)
if(GPERFTOOLS_FOUND)
set(GNSS_SDR_TEST_OPTIONAL_LIBS "${GNSS_SDR_TEST_OPTIONAL_LIBS};${GPERFTOOLS_LIBRARIES}")
@@ -152,6 +158,10 @@ if(OPENCL_FOUND)
add_definitions(-DOPENCL_BLOCKS_TEST=1)
endif(OPENCL_FOUND)
+if (ENABLE_CUDA)
+ add_definitions(-DCUDA_BLOCKS_TEST=1)
+endif(ENABLE_CUDA)
+
add_definitions(-DTEST_PATH="${CMAKE_SOURCE_DIR}/src/tests/")
diff --git a/src/tests/arithmetic/cpu_multicorrelator_test.cc b/src/tests/arithmetic/cpu_multicorrelator_test.cc
new file mode 100644
index 0000000..d84b38f
--- /dev/null
+++ b/src/tests/arithmetic/cpu_multicorrelator_test.cc
@@ -0,0 +1,167 @@
+/*!
+ * \file fft_length_test.cc
+ * \brief This file implements timing tests for the FFT.
+ * \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <ctime>
+#include <complex>
+#include <thread>
+#include <volk/volk.h>
+#include "cpu_multicorrelator.h"
+#include "gps_sdr_signal_processing.h"
+#include "GPS_L1_CA.h"
+
+
+DEFINE_int32(cpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in CPU multicorrelator test timing test");
+DEFINE_int32(cpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in CPU multicorrelator test timing test");
+
+void run_correlator_cpu(cpu_multicorrelator* correlator,
+ float d_rem_carrier_phase_rad,
+ float d_carrier_phase_step_rad,
+ float d_code_phase_step_chips,
+ float d_rem_code_phase_chips,
+ int correlation_size,
+ int d_n_correlator_taps)
+{
+ correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
+ d_carrier_phase_step_rad,
+ d_code_phase_step_chips,
+ d_rem_code_phase_chips,
+ correlation_size);
+}
+
+TEST(CPU_multicorrelator_test, MeasureExecutionTime)
+{
+ struct timeval tv;
+ int max_threads=FLAGS_cpu_multicorrelator_max_threads_test;
+ std::vector<std::thread> thread_pool;
+ cpu_multicorrelator* correlator_pool[max_threads];
+ unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
+ double execution_times [3];
+
+ gr_complex* d_ca_code;
+ gr_complex* in_cpu;
+ gr_complex* d_correlator_outs;
+
+ int d_n_correlator_taps=3;
+ int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory
+ float* d_local_code_shift_chips;
+
+ //allocate host memory
+ // Get space for a vector with the C/A code replica sampled 1x/chip
+ d_ca_code = static_cast<gr_complex*>(volk_malloc(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_get_alignment()));
+ in_cpu = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
+
+ // correlator outputs (scalar)
+ d_n_correlator_taps = 3; // Early, Prompt, and Late
+ d_correlator_outs = static_cast<gr_complex*>(volk_malloc(d_n_correlator_taps*sizeof(gr_complex), volk_get_alignment()));
+ for (int n = 0; n < d_n_correlator_taps; n++)
+ {
+ d_correlator_outs[n] = gr_complex(0,0);
+ }
+ d_local_code_shift_chips = static_cast<float*>(volk_malloc(d_n_correlator_taps*sizeof(float), volk_get_alignment()));
+ // Set TAPs delay values [chips]
+ float d_early_late_spc_chips=0.5;
+ d_local_code_shift_chips[0] = - d_early_late_spc_chips;
+ d_local_code_shift_chips[1] = 0.0;
+ d_local_code_shift_chips[2] = d_early_late_spc_chips;
+
+ //--- Perform initializations ------------------------------
+
+ //local code resampler on GPU
+ // generate local reference (1 sample per chip)
+ gps_l1_ca_code_gen_complex(d_ca_code, 1, 0);
+ // generate inut signal
+ for (int n=0;n<2*d_vector_length;n++)
+ {
+ in_cpu[n]=std::complex<float>(static_cast <float> (rand())/static_cast<float>(RAND_MAX),static_cast <float> (rand())/static_cast<float>(RAND_MAX));
+ }
+
+ for (int n=0;n<max_threads;n++)
+ {
+ correlator_pool[n] = new cpu_multicorrelator();
+ correlator_pool[n]->init(d_vector_length, d_n_correlator_taps);
+ correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_cpu);
+ correlator_pool[n]->set_local_code_and_taps(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips);
+ }
+
+ float d_rem_carrier_phase_rad=0.0;
+ float d_carrier_phase_step_rad=0.1;
+ float d_code_phase_step_chips=0.3;
+ float d_rem_code_phase_chips=0.4;
+
+ EXPECT_NO_THROW(
+ for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
+ {
+ for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++)
+ {
+ std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
+ gettimeofday(&tv, NULL);
+ long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
+ for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
+ {
+ //create the concurrent correlator threads
+ for (int current_thread=0;current_thread<current_max_threads;current_thread++)
+ {
+ thread_pool.push_back(std::thread(run_correlator_cpu,
+ correlator_pool[current_thread],
+ d_rem_carrier_phase_rad,
+ d_carrier_phase_step_rad,
+ d_code_phase_step_chips,
+ d_rem_code_phase_chips,
+ correlation_sizes[correlation_sizes_idx],
+ d_n_correlator_taps));
+ }
+ //wait the threads to finish they work and destroy the thread objects
+ for(auto &t : thread_pool){
+ t.join();
+ }
+ thread_pool.clear();
+ }
+ gettimeofday(&tv, NULL);
+ long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
+ execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test));
+ std::cout << "CPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
+
+ }
+ }
+ );
+
+
+ volk_free(d_local_code_shift_chips);
+ volk_free(d_correlator_outs);
+ volk_free(d_ca_code);
+ volk_free(in_cpu);
+
+ for (int n=0;n<max_threads;n++)
+ {
+ correlator_pool[n]->free();
+ delete(correlator_pool[n]);
+ }
+}
diff --git a/src/tests/arithmetic/gpu_multicorrelator_test.cc b/src/tests/arithmetic/gpu_multicorrelator_test.cc
new file mode 100644
index 0000000..11c4fde
--- /dev/null
+++ b/src/tests/arithmetic/gpu_multicorrelator_test.cc
@@ -0,0 +1,166 @@
+/*!
+ * \file fft_length_test.cc
+ * \brief This file implements timing tests for the FFT.
+ * \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <ctime>
+#include <complex>
+#include <thread>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "cuda_multicorrelator.h"
+#include "gps_sdr_signal_processing.h"
+#include "GPS_L1_CA.h"
+
+
+DEFINE_int32(gpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in GPU multicorrelator test timing test");
+DEFINE_int32(gpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in GPU multicorrelator test timing test");
+
+void run_correlator_gpu(cuda_multicorrelator* correlator,
+ float d_rem_carrier_phase_rad,
+ float d_carrier_phase_step_rad,
+ float d_code_phase_step_chips,
+ float d_rem_code_phase_chips,
+ int correlation_size,
+ int d_n_correlator_taps)
+{
+ correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
+ d_carrier_phase_step_rad,
+ d_code_phase_step_chips,
+ d_rem_code_phase_chips,
+ correlation_size,
+ d_n_correlator_taps);
+}
+
+TEST(GPU_multicorrelator_test, MeasureExecutionTime)
+{
+ struct timeval tv;
+ int max_threads=FLAGS_gpu_multicorrelator_max_threads_test;
+ std::vector<std::thread> thread_pool;
+ cuda_multicorrelator* correlator_pool[max_threads];
+ unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
+ double execution_times [3];
+
+ gr_complex* d_ca_code;
+ gr_complex* in_gpu;
+ gr_complex* d_correlator_outs;
+
+ int d_n_correlator_taps=3;
+ int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory
+ float* d_local_code_shift_chips;
+ // Set GPU flags
+ cudaSetDeviceFlags(cudaDeviceMapHost);
+ //allocate host memory
+ //pinned memory mode - use special function to get OS-pinned memory
+ d_n_correlator_taps = 3; // Early, Prompt, and Late
+ // Get space for a vector with the C/A code replica sampled 1x/chip
+ cudaHostAlloc((void**)&d_ca_code, (static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)* sizeof(gr_complex)), cudaHostAllocMapped | cudaHostAllocWriteCombined);
+ // Get space for the resampled early / prompt / late local replicas
+ cudaHostAlloc((void**)&d_local_code_shift_chips, d_n_correlator_taps * sizeof(float), cudaHostAllocMapped | cudaHostAllocWriteCombined);
+ cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped | cudaHostAllocWriteCombined);
+ // correlator outputs (scalar)
+ cudaHostAlloc((void**)&d_correlator_outs ,sizeof(gr_complex)*d_n_correlator_taps, cudaHostAllocMapped | cudaHostAllocWriteCombined );
+
+ //--- Perform initializations ------------------------------
+ //local code resampler on GPU
+ // generate local reference (1 sample per chip)
+ gps_l1_ca_code_gen_complex(d_ca_code, 1, 0);
+ // generate inut signal
+ for (int n=0;n<2*d_vector_length;n++)
+ {
+ in_gpu[n]=std::complex<float>(static_cast <float> (rand())/static_cast<float>(RAND_MAX),static_cast <float> (rand())/static_cast<float>(RAND_MAX));
+ }
+ // Set TAPs delay values [chips]
+ float d_early_late_spc_chips=0.5;
+ d_local_code_shift_chips[0] = - d_early_late_spc_chips;
+ d_local_code_shift_chips[1] = 0.0;
+ d_local_code_shift_chips[2] = d_early_late_spc_chips;
+ for (int n=0;n<max_threads;n++)
+ {
+ correlator_pool[n] = new cuda_multicorrelator();
+ correlator_pool[n]->init_cuda_integrated_resampler(d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, d_n_correlator_taps);
+ correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_gpu);
+ }
+
+ float d_rem_carrier_phase_rad=0.0;
+ float d_carrier_phase_step_rad=0.1;
+ float d_code_phase_step_chips=0.3;
+ float d_rem_code_phase_chips=0.4;
+
+ EXPECT_NO_THROW(
+ for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
+ {
+ for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++)
+ {
+ std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
+ gettimeofday(&tv, NULL);
+ long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
+ for(int k = 0; k < FLAGS_gpu_multicorrelator_iterations_test; k++)
+ {
+ //create the concurrent correlator threads
+ for (int current_thread=0;current_thread<current_max_threads;current_thread++)
+ {
+ thread_pool.push_back(std::thread(run_correlator_gpu,
+ correlator_pool[current_thread],
+ d_rem_carrier_phase_rad,
+ d_carrier_phase_step_rad,
+ d_code_phase_step_chips,
+ d_rem_code_phase_chips,
+ correlation_sizes[correlation_sizes_idx],
+ d_n_correlator_taps));
+ }
+ //wait the threads to finish they work and destroy the thread objects
+ for(auto &t : thread_pool){
+ t.join();
+ }
+ thread_pool.clear();
+ }
+ gettimeofday(&tv, NULL);
+ long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
+ execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test));
+ std::cout << "GPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
+
+ }
+ }
+ );
+
+ cudaFreeHost(in_gpu);
+ cudaFreeHost(d_correlator_outs);
+ cudaFreeHost(d_local_code_shift_chips);
+ cudaFreeHost(d_ca_code);
+
+ for (int n=0;n<max_threads;n++)
+ {
+ correlator_pool[n]->free_cuda();
+ delete(correlator_pool[n]);
+ }
+
+
+
+}
diff --git a/src/tests/test_main.cc b/src/tests/test_main.cc
index a3dabce..5b3228f 100644
--- a/src/tests/test_main.cc
+++ b/src/tests/test_main.cc
@@ -92,9 +92,15 @@ DECLARE_string(log_dir);
#include "gnss_block/gps_l2_m_pcps_acquisition_test.cc"
#include "gnss_block/gps_l1_ca_pcps_acquisition_gsoc2013_test.cc"
//#include "gnss_block/gps_l1_ca_pcps_multithread_acquisition_gsoc2013_test.cc"
+#include "arithmetic/cpu_multicorrelator_test.cc"
#if OPENCL_BLOCKS_TEST
#include "gnss_block/gps_l1_ca_pcps_opencl_acquisition_gsoc2013_test.cc"
#endif
+
+#if CUDA_BLOCKS_TEST
+ #include "arithmetic/gpu_multicorrelator_test.cc"
+#endif
+
#include "gnss_block/gps_l1_ca_pcps_quicksync_acquisition_gsoc2014_test.cc"
#include "gnss_block/gps_l1_ca_pcps_tong_acquisition_gsoc2013_test.cc"
#include "gnss_block/galileo_e1_pcps_ambiguous_acquisition_test.cc"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-hamradio/gnss-sdr.git
More information about the pkg-hamradio-commits
mailing list