[arrayfire] 36/79: Enable multiple CUDA computes to be detected and enabled

Mon Jun 15 13:38:05 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository arrayfire.

commit dbc2ae13ca94b53da708cc56673f735d3516b60a
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Tue Jun 9 14:47:03 2015 -0400

    Enable multiple CUDA computes to be detected and enabled
    
    * Now enables multiple computes
    * Removes CUDA_DETECTED_COMPUTE cmake variable. Using COMPUTES_DETECTED_LIST
    * Still disable detection using CUDA_DETECT_COMPUTE=OFF
    * Setting COMPUTES_DETECTED_LIST="20;30;xy" etc will disable computes detection
      and enable the specified computes
    * Can also manually set CUDA_COMPUTE_XY to ON
---
 CMakeModules/CUDACheckCompute.cmake      | 40 ++++++++++------------
 CMakeModules/cuda_compute_capability.c   | 52 ----------------------------
 CMakeModules/cuda_compute_capability.cpp | 58 ++++++++++++++++++++++++++++++++
 src/backend/cuda/CMakeLists.txt          | 18 ++++++++--
 4 files changed, 91 insertions(+), 77 deletions(-)

diff --git a/CMakeModules/CUDACheckCompute.cmake b/CMakeModules/CUDACheckCompute.cmake
index db2174a..574222d 100644
--- a/CMakeModules/CUDACheckCompute.cmake
+++ b/CMakeModules/CUDACheckCompute.cmake
@@ -5,34 +5,30 @@
 # Check for GPUs present and their compute capability
 # based on http://stackoverflow.com/questions/2285185/easiest-way-to-test-for-existence-of-cuda-capable-gpu-from-cmake/2297877#2297877 (Christopher Bruns)
 
-if(CUDA_FOUND)
-    message(STATUS "${CMAKE_MODULE_PATH}/cuda_compute_capability.c")
-    try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
+IF(CUDA_FOUND)
+    MESSAGE(STATUS "${CMAKE_MODULE_PATH}/cuda_compute_capability.cpp")
+    TRY_RUN(RUN_RESULT_VAR COMPILE_RESULT_VAR
         ${CMAKE_BINARY_DIR}
-        ${CMAKE_MODULE_PATH}/cuda_compute_capability.c
+        ${CMAKE_MODULE_PATH}/cuda_compute_capability.cpp
         CMAKE_FLAGS
         -DINCLUDE_DIRECTORIES:STRING=${CUDA_TOOLKIT_INCLUDE}
         -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
         COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
         RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR)
-    message(STATUS "Compile: ${RUN_OUTPUT_VAR}")
-    if (COMPILE_RESULT_VAR)
-        message(STATUS "compiled -> " ${RUN_RESULT_VAR})
-    else()
-        message(STATUS "didn't compile")
-    endif()
+    MESSAGE(STATUS "Output: ${RUN_OUTPUT_VAR}")
+    IF(COMPILE_RESULT_VAR)
+        # Convert output into a list of computes
+        STRING(REPLACE " " ";" COMPUTES_DETECTED_LIST ${RUN_OUTPUT_VAR})
+    ELSE()
+        MESSAGE(STATUS "didn't compile")
+    ENDIF()
     # COMPILE_RESULT_VAR is TRUE when compile succeeds
     # RUN_RESULT_VAR is zero when a GPU is found
-    if(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
-        message(STATUS "worked")
-        set(CUDA_HAVE_GPU TRUE CACHE BOOL "Whether CUDA-capable GPU is present")
-        set(CUDA_DETECTED_COMPUTE ${RUN_OUTPUT_VAR} CACHE STRING "Compute capability of CUDA-capable GPU present")
-        #set(CUDA_GENERATE_CODE "arch=compute_${CUDA_DETECTED_COMPUTE},code=sm_${CUDA_DETECTED_COMPUTE}" CACHE STRING "Which GPU architectures to generate code for (each arch/code pair will be passed as --generate-code option to nvcc, separate multiple pairs by ;)")
-        #mark_as_advanced(CUDA_DETECTED_COMPUTE CUDA_GENERATE_CODE)
-        #set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch compute_${CUDA_DETECTED_COMPUTE})
-        mark_as_advanced(CUDA_DETECTED_COMPUTE)
-    else()
-        message(STATUS "didn't work")
-        set(CUDA_HAVE_GPU FALSE CACHE BOOL "Whether CUDA-capable GPU is present")
-    endif()
+    IF(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
+        MESSAGE(STATUS "CUDA Compute Detection Worked")
+        SET(CUDA_HAVE_GPU TRUE CACHE BOOL "Whether CUDA-capable GPU is present")
+    ELSE()
+        MESSAGE(STATUS "didn't work")
+        SET(CUDA_HAVE_GPU FALSE CACHE BOOL "Whether CUDA-capable GPU is present")
+    ENDIF()
 endif()
diff --git a/CMakeModules/cuda_compute_capability.c b/CMakeModules/cuda_compute_capability.c
deleted file mode 100644
index bc17a40..0000000
--- a/CMakeModules/cuda_compute_capability.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-* Copyright (C) 2011 Florian Rathgeber, florian.rathgeber at gmail.com
-*
-* This code is licensed under the MIT License.  See the FindCUDA.cmake script
-* for the text of the license.
-*
-* Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
-* http://stackoverflow.com/questions/2285185
-*/
-
-#include <stdio.h>
-#include <cuda_runtime.h>
-
-int main() {
-    int deviceCount, device, major = 9999, minor = 9999;
-    int gpuDeviceCount = 0;
-    struct cudaDeviceProp properties;
-
-    if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
-    {
-        printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
-        return 1;
-    }
-    /* machines with no GPUs can still report one emulation device */
-    for (device = 0; device < deviceCount; ++device) {
-        cudaGetDeviceProperties(&properties, device);
-        if (properties.major != 9999) {/* 9999 means emulation only */
-            ++gpuDeviceCount;
-            /*  get minimum compute capability of all devices */
-            if (major > properties.major) {
-                major = properties.major;
-                minor = properties.minor;
-            } else if (minor > properties.minor) {
-                minor = properties.minor;
-            }
-        }
-    }
-
-    /* don't just return the number of gpus, because other runtime cuda
-    errors can also yield non-zero return values */
-    if (gpuDeviceCount > 0) {
-        if ((major == 2 && minor == 1))
-        {
-            // There is no --arch compute_21 flag for nvcc, so force minor to 0
-            minor = 0;
-        }
-        /* this output will be parsed by FindCUDA.cmake */
-        printf("%d%d", major, minor);
-        return 0; /* success */
-    }
-    return 1; /* failure */
-}
diff --git a/CMakeModules/cuda_compute_capability.cpp b/CMakeModules/cuda_compute_capability.cpp
new file mode 100644
index 0000000..ef589a9
--- /dev/null
+++ b/CMakeModules/cuda_compute_capability.cpp
@@ -0,0 +1,58 @@
+/*
+* Copyright (C) 2011 Florian Rathgeber, florian.rathgeber at gmail.com
+*
+* This code is licensed under the MIT License.  See the FindCUDA.cmake script
+* for the text of the license.
+*
+* Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
+* http://stackoverflow.com/questions/2285185
+*/
+
+#include <stdio.h>
+#include <cuda_runtime.h>
+#include <iterator>
+#include <set>
+
+int main() {
+    int deviceCount;
+    int gpuDeviceCount = 0;
+    struct cudaDeviceProp properties;
+
+    if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
+    {
+        printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
+        return 1;
+    }
+
+    std::set<int> computes;
+    typedef std::set<int>::iterator iter;
+
+    // machines with no GPUs can still report one emulation device
+    for (int device = 0; device < deviceCount; ++device) {
+        int major = 9999, minor = 9999;
+        cudaGetDeviceProperties(&properties, device);
+        if (properties.major != 9999) { // 9999 means emulation only
+            ++gpuDeviceCount;
+            major = properties.major;
+            minor = properties.minor;
+            if ((major == 2 && minor == 1)) {
+                // There is no --arch compute_21 flag for nvcc, so force minor to 0
+                minor = 0;
+            }
+            computes.insert(10 * major + minor);
+        }
+    }
+    int i = 0;
+    for(iter it = computes.begin(); it != computes.end(); it++, i++) {
+        if(i > 0) {
+            printf(" ");
+        }
+        printf("%d", *it);
+    }
+    /* don't just return the number of gpus, because other runtime cuda
+    errors can also yield non-zero return values */
+    if (gpuDeviceCount <= 0 || computes.size() <= 0) {
+        return 1; // failure
+    }
+    return 0; // success
+}
diff --git a/src/backend/cuda/CMakeLists.txt b/src/backend/cuda/CMakeLists.txt
index e3030e0..0d6dc96 100644
--- a/src/backend/cuda/CMakeLists.txt
+++ b/src/backend/cuda/CMakeLists.txt
@@ -8,11 +8,23 @@ INCLUDE("${CMAKE_MODULE_PATH}/FindNVVM.cmake")
 
 # Disables running cuda_compute_check.c when build windows using remote
 OPTION(CUDA_COMPUTE_DETECT "Run autodetection of CUDA Architecture" ON)
-IF(CUDA_COMPUTE_DETECT AND NOT DEFINED CUDA_DETECTED_COMPUTE)
+IF(CUDA_COMPUTE_DETECT AND NOT DEFINED COMPUTES_DETECTED_LIST)
     INCLUDE("${CMAKE_MODULE_PATH}/CUDACheckCompute.cmake")
-    SET(CUDA_COMPUTE_${CUDA_DETECTED_COMPUTE} ON CACHE BOOL "" FORCE)
 ENDIF()
 
+LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
+IF(${COMPUTES_LEN} EQUAL 0)
+    MESSAGE(STATUS "No computes detected. Fall back to 20, 30, 50")
+    LIST(APPEND COMPUTES_DETECTED_LIST "20" "30" "50")
+ENDIF()
+LIST(LENGTH COMPUTES_DETECTED_LIST COMPUTES_LEN)
+MESSAGE(STATUS "Number of Computes = ${COMPUTES_LEN}")
+
+FOREACH(COMPUTE_DETECTED ${COMPUTES_DETECTED_LIST})
+    MESSAGE(STATUS "Setting Compute ${COMPUTE_DETECTED} to ON")
+    SET(CUDA_COMPUTE_${COMPUTE_DETECTED} ON CACHE BOOL "" FORCE)
+ENDFOREACH()
+
 OPTION(CUDA_COMPUTE_20 "CUDA Compute Capability 2.0" OFF)
 OPTION(CUDA_COMPUTE_30 "CUDA Compute Capability 3.0" OFF)
 OPTION(CUDA_COMPUTE_32 "CUDA Compute Capability 3.2" OFF)
@@ -158,7 +170,7 @@ SOURCE_GROUP(api\\cpp\\Sources FILES ${cpp_sources})
 
 LIST(LENGTH COMPUTE_VERSIONS COMPUTE_COUNT)
 IF(${COMPUTE_COUNT} EQUAL 1)
-  SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_GENERATE_CODE}")
+    SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_GENERATE_CODE}")
 ELSE()
     SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch sm_20")
 ENDIF()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git