[arrayfire] 36/79: Enable multiple CUDA computes to be detected and enabled
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Jun 15 13:38:05 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository arrayfire.
commit dbc2ae13ca94b53da708cc56673f735d3516b60a
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Tue Jun 9 14:47:03 2015 -0400
Enable multiple CUDA computes to be detected and enabled
* Now enables multiple computes
* Removes CUDA_DETECTED_COMPUTE cmake variable. Using COMPUTES_DETECTED_LIST
* Still disable detection using CUDA_DETECT_COMPUTE=OFF
* Setting COMPUTES_DETECTED_LIST="20;30;xy" etc will disable computes detection
and enable the specified computes
* Can also manually set CUDA_COMPUTE_XY to ON
---
CMakeModules/CUDACheckCompute.cmake | 40 ++++++++++------------
CMakeModules/cuda_compute_capability.c | 52 ----------------------------
CMakeModules/cuda_compute_capability.cpp | 58 ++++++++++++++++++++++++++++++++
src/backend/cuda/CMakeLists.txt | 18 ++++++++--
4 files changed, 91 insertions(+), 77 deletions(-)
diff --git a/CMakeModules/CUDACheckCompute.cmake b/CMakeModules/CUDACheckCompute.cmake
index db2174a..574222d 100644
--- a/CMakeModules/CUDACheckCompute.cmake
+++ b/CMakeModules/CUDACheckCompute.cmake
@@ -5,34 +5,30 @@
# Check for GPUs present and their compute capability
# based on http://stackoverflow.com/questions/2285185/easiest-way-to-test-for-existence-of-cuda-capable-gpu-from-cmake/2297877#2297877 (Christopher Bruns)
-if(CUDA_FOUND)
- message(STATUS "${CMAKE_MODULE_PATH}/cuda_compute_capability.c")
- try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
+IF(CUDA_FOUND)
+ MESSAGE(STATUS "${CMAKE_MODULE_PATH}/cuda_compute_capability.cpp")
+ TRY_RUN(RUN_RESULT_VAR COMPILE_RESULT_VAR
${CMAKE_BINARY_DIR}
- ${CMAKE_MODULE_PATH}/cuda_compute_capability.c
+ ${CMAKE_MODULE_PATH}/cuda_compute_capability.cpp
CMAKE_FLAGS
-DINCLUDE_DIRECTORIES:STRING=${CUDA_TOOLKIT_INCLUDE}
-DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR)
- message(STATUS "Compile: ${RUN_OUTPUT_VAR}")
- if (COMPILE_RESULT_VAR)
- message(STATUS "compiled -> " ${RUN_RESULT_VAR})
- else()
- message(STATUS "didn't compile")
- endif()
+ MESSAGE(STATUS "Output: ${RUN_OUTPUT_VAR}")
+ IF(COMPILE_RESULT_VAR)
+ # Convert output into a list of computes
+ STRING(REPLACE " " ";" COMPUTES_DETECTED_LIST ${RUN_OUTPUT_VAR})
+ ELSE()
+ MESSAGE(STATUS "didn't compile")
+ ENDIF()
# COMPILE_RESULT_VAR is TRUE when compile succeeds
# RUN_RESULT_VAR is zero when a GPU is found
- if(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
- message(STATUS "worked")
- set(CUDA_HAVE_GPU TRUE CACHE BOOL "Whether CUDA-capable GPU is present")
- set(CUDA_DETECTED_COMPUTE ${RUN_OUTPUT_VAR} CACHE STRING "Compute capability of CUDA-capable GPU present")
- #set(CUDA_GENERATE_CODE "arch=compute_${CUDA_DETECTED_COMPUTE},code=sm_${CUDA_DETECTED_COMPUTE}" CACHE STRING "Which GPU architectures to generate code for (each arch/code pair will be passed as --generate-code option to nvcc, separate multiple pairs by ;)")
- #mark_as_advanced(CUDA_DETECTED_COMPUTE CUDA_GENERATE_CODE)
- #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch compute_${CUDA_DETECTED_COMPUTE})
- mark_as_advanced(CUDA_DETECTED_COMPUTE)
- else()
- message(STATUS "didn't work")
- set(CUDA_HAVE_GPU FALSE CACHE BOOL "Whether CUDA-capable GPU is present")
- endif()
+ IF(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
+ MESSAGE(STATUS "CUDA Compute Detection Worked")
+ SET(CUDA_HAVE_GPU TRUE CACHE BOOL "Whether CUDA-capable GPU is present")
+ ELSE()
+ MESSAGE(STATUS "didn't work")
+ SET(CUDA_HAVE_GPU FALSE CACHE BOOL "Whether CUDA-capable GPU is present")
+ ENDIF()
endif()
diff --git a/CMakeModules/cuda_compute_capability.c b/CMakeModules/cuda_compute_capability.c
deleted file mode 100644
index bc17a40..0000000
--- a/CMakeModules/cuda_compute_capability.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-* Copyright (C) 2011 Florian Rathgeber, florian.rathgeber at gmail.com
-*
-* This code is licensed under the MIT License. See the FindCUDA.cmake script
-* for the text of the license.
-*
-* Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
-* http://stackoverflow.com/questions/2285185
-*/
-
-#include <stdio.h>
-#include <cuda_runtime.h>
-
-int main() {
- int deviceCount, device, major = 9999, minor = 9999;
- int gpuDeviceCount = 0;
- struct cudaDeviceProp properties;
-
- if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
- {
- printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
- return 1;
- }
- /* machines with no GPUs can still report one emulation device */
- for (device = 0; device < deviceCount; ++device) {
- cudaGetDeviceProperties(&properties, device);
- if (properties.major != 9999) {/* 9999 means emulation only */
- ++gpuDeviceCount;
- /* get minimum compute capability of all devices */
- if (major > properties.major) {
- major = properties.major;
- minor = properties.minor;
- } else if (minor > properties.minor) {
- minor = properties.minor;
- }
- }
- }
-
- /* don't just return the number of gpus, because other runtime cuda
- errors can also yield non-zero return values */
- if (gpuDeviceCount > 0) {
- if ((major == 2 && minor == 1))
- {
- // There is no --arch compute_21 flag for nvcc, so force minor to 0
- minor = 0;
- }
- /* this output will be parsed by FindCUDA.cmake */
- printf("%d%d", major, minor);
- return 0; /* success */
- }
- return 1; /* failure */
-}
diff --git a/CMakeModules/cuda_compute_capability.cpp b/CMakeModules/cuda_compute_capability.cpp
new file mode 100644
index 0000000..ef589a9
--- /dev/null
+++ b/CMakeModules/cuda_compute_capability.cpp
@@ -0,0 +1,58 @@
+/*
+* Copyright (C) 2011 Florian Rathgeber, florian.rathgeber at gmail.com
+*
+* This code is licensed under the MIT License. See the FindCUDA.cmake script
+* for the text of the license.
+*
+* Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
+* http://stackoverflow.com/questions/2285185
+*/
+
+#include <stdio.h>
+#include <cuda_runtime.h>
+#include <iterator>
+#include <set>
+
+int main() {
+ int deviceCount;
+ int gpuDeviceCount = 0;
+ struct cudaDeviceProp properties;
+
+ if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
+ {
+ printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
+ return 1;
+ }
+
+ std::set<int> computes;
+ typedef std::set<int>::iterator iter;
+
+ // machines with no GPUs can still report one emulation device
+ for (int device = 0; device < deviceCount; ++device) {
+ int major = 9999, minor = 9999;
+ cudaGetDeviceProperties(&properties, device);
+ if (properties.major != 9999) { // 9999 means emulation only
+ ++gpuDeviceCount;
+ major = properties.major;
+ minor = properties.minor;
+ if ((major == 2 && minor == 1)) {
+ // There is no --arch compute_21 flag for nvcc, so force minor to 0
+ minor = 0;
+ }
+ computes.insert(10 * major + minor);
+ }
+ }
+ int i = 0;
+ for(iter it = computes.begin(); it != computes.end(); it++, i++) {
+ if(i > 0) {
+ printf(" ");
+ }
+ printf("%d", *it);
+ }
+ /* don't just return the number of gpus, because other runtime cuda
+ errors can also yield non-zero return values */
+ if (gpuDeviceCount <= 0 || computes.size() <= 0) {
+ return 1; // failure
+ }
+ return 0; // success
+}
diff --git a/src/backend/cuda/CMakeLists.txt b/src/backend/cuda/CMakeLists.txt
index e3030e0..0d6dc96 100644
--- a/src/backend/cuda/CMakeLists.txt
+++ b/src/backend/cuda/CMakeLists.txt
@@ -8,11 +8,23 @@ INCLUDE("${CMAKE_MODULE_PATH}/FindNVVM.cmake")
# Disables running cuda_compute_check.c when build windows using remote
OPTION(CUDA_COMPUTE_DETECT "Run autodetection of CUDA Architecture" ON)
-IF(CUDA_COMPUTE_DETECT AND NOT DEFINED CUDA_DETECTED_COMPUTE)
+IF(CUDA_COMPUTE_DETECT AND NOT DEFINED COMPUTES_DETECTED_LIST)
INCLUDE("${CMAKE_MODULE_PATH}/CUDACheckCompute.cmake")
- SET(CUDA_COMPUTE_${CUDA_DETECTED_COMPUTE} ON CACHE BOOL "" FORCE)
ENDIF()
+LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
+IF(${COMPUTES_LEN} EQUAL 0)
+ MESSAGE(STATUS "No computes detected. Fall back to 20, 30, 50")
+ LIST(APPEND COMPUTES_DETECTED_LIST "20" "30" "50")
+ENDIF()
+LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
+MESSAGE(STATUS "Number of Computes = ${COMPUTES_LEN}")
+
+FOREACH(COMPUTE_DETECTED ${COMPUTES_DETECTED_LIST})
+ MESSAGE(STATUS "Setting Compute ${COMPUTE_DETECTED} to ON")
+ SET(CUDA_COMPUTE_${COMPUTE_DETECTED} ON CACHE BOOL "" FORCE)
+ENDFOREACH()
+
OPTION(CUDA_COMPUTE_20 "CUDA Compute Capability 2.0" OFF)
OPTION(CUDA_COMPUTE_30 "CUDA Compute Capability 3.0" OFF)
OPTION(CUDA_COMPUTE_32 "CUDA Compute Capability 3.2" OFF)
@@ -158,7 +170,7 @@ SOURCE_GROUP(api\\cpp\\Sources FILES ${cpp_sources})
LIST(LENGTH COMPUTE_VERSIONS COMPUTE_COUNT)
IF(${COMPUTE_COUNT} EQUAL 1)
- SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_GENERATE_CODE}")
+ SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_GENERATE_CODE}")
ELSE()
SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch sm_20")
ENDIF()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list