[clinfo] 97/148: Preliminary change to ease switching device info over to traits

Andreas Beckmann anbe at moszumanska.debian.org
Mon Nov 17 14:09:51 UTC 2014


This is an automated email from the git hooks/post-receive script.

anbe pushed a commit to branch clinfo
in repository clinfo.

commit 219724f677858e8f46c684b6036fd4b7e9c9fc07
Author: Giuseppe Bilotta <giuseppe.bilotta at gmail.com>
Date:   Tue Nov 4 14:28:51 2014 +0100

    Preliminary change to ease switching device info over to traits
---
 src/clinfo.c | 203 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 121 insertions(+), 82 deletions(-)

diff --git a/src/clinfo.c b/src/clinfo.c
index 1aba898..913a61c 100644
--- a/src/clinfo.c
+++ b/src/clinfo.c
@@ -287,6 +287,68 @@ getOpenCLVersion(const char *version)
 	return ret;
 }
 
+struct device_info_checks {
+	cl_device_type devtype;
+	char has_half[12];
+	char has_double[24];
+	char has_nv[29];
+	char has_amd[30];
+	char has_svm_ext[11];
+	char has_fission[22];
+	char has_atomic_counters[26];
+	char has_image2d_buffer[27];
+	char has_intel_local_thread[30];
+	char has_altera_dev_temp[29];
+	char has_spir[12];
+	char has_qcom_ext_host_ptr[21];
+	cl_uint dev_version;
+};
+
+#define DEFINE_EXT_CHECK(ext) int dev_has_##ext(const struct device_info_checks *chk) \
+{ \
+	return !!(chk->has_##ext[0]); \
+}
+
+DEFINE_EXT_CHECK(half)
+DEFINE_EXT_CHECK(double)
+DEFINE_EXT_CHECK(nv)
+DEFINE_EXT_CHECK(amd)
+DEFINE_EXT_CHECK(svm_ext)
+DEFINE_EXT_CHECK(fission)
+DEFINE_EXT_CHECK(atomic_counters)
+DEFINE_EXT_CHECK(image2d_buffer)
+DEFINE_EXT_CHECK(intel_local_thread)
+DEFINE_EXT_CHECK(altera_dev_temp)
+DEFINE_EXT_CHECK(spir)
+DEFINE_EXT_CHECK(qcom_ext_host_ptr)
+
+// device supports 1.2
+int dev_is_12(const struct device_info_checks *chk)
+{
+	return !!(chk->dev_version >= 12);
+}
+
+// device supports 2.0
+int dev_is_20(const struct device_info_checks *chk)
+{
+	return !!(chk->dev_version >= 20);
+}
+
+int dev_is_gpu(const struct device_info_checks *chk)
+{
+	return !!(chk->devtype & CL_DEVICE_TYPE_GPU);
+}
+
+int dev_is_gpu_amd(const struct device_info_checks *chk)
+{
+	return dev_is_gpu(chk) && dev_has_amd(chk);
+}
+
+int dev_has_svm(const struct device_info_checks *chk)
+{
+	return dev_is_20(chk) || dev_has_svm_ext(chk);
+}
+
 void
 printDeviceInfo(cl_uint d)
 {
@@ -322,28 +384,8 @@ printDeviceInfo(cl_uint d)
 	char* extensions;
 
 	// these will hold the string from which we detected extension support
-	char has_half[12] = {0};
-	char has_double[24] = {0};
-	char has_nv[29] = {0};
-	char has_amd[30] = {0};
-	char has_svm[11] = {0};
-	char has_fission[22] = {0};
-	char has_atomic_counters[26] = {0};
-	char has_image2d_buffer[27] = {0};
-	char has_intel_local_thread[30] = {0};
-	char has_altera_dev_temp[29] = {0};
-	char has_spir[12] = {0};
-	char has_qcom_ext_host_ptr[21] = {0};
-
-	// OpenCL device version, as major*10 + minor
-	cl_uint dev_version = 10;
-
-	// device supports OpenCL 1.2
-	cl_bool is_12 = CL_FALSE;
-	// device supports OpenCL 2.0
-	cl_bool is_20 = CL_FALSE;
-	// device is a GPU
-	cl_bool is_gpu = CL_FALSE;
+	struct device_info_checks chk;
+	memset(&chk, 0, sizeof(chk));
 
 #define KB UINT64_C(1024)
 #define MB (KB*KB)
@@ -427,12 +469,10 @@ printDeviceInfo(cl_uint d)
 	HEX_PARAM(VENDOR_ID, "Device Vendor ID");
 	STR_PARAM(VERSION, "Version");
 	// skip "OpenCL "
-	dev_version = getOpenCLVersion(strbuf + 7);
-	is_12 = !!(dev_version >= 12);
-	is_20 = !!(dev_version >= 20);
+	chk.dev_version = getOpenCLVersion(strbuf + 7);
 #if 0 // debug OpenCL version detection
 	printf("==> CL%u (is_12: %s, is_20: %s)\n",
-		dev_version, bool_str[is_12], bool_str[is_20]);
+		dev_version, bool_str[dev_is_12(&chk)], bool_str[dev_is_20(&chk)]);
 #endif
 
 	SHOW_STRING(clGetDeviceInfo, CL_DRIVER_VERSION, "Driver Version", dev);
@@ -448,8 +488,8 @@ printDeviceInfo(cl_uint d)
 #define _HAS_EXT(ext) (strstr(extensions, ext))
 #define HAS_EXT(ext) _HAS_EXT(#ext)
 #define CPY_EXT(what, ext) do { \
-	strncpy(has_##what, has, sizeof(ext)); \
-	has_##what[sizeof(ext)-1] = '\0'; \
+	strncpy(chk.has_##what, has, sizeof(ext)); \
+	chk.has_##what[sizeof(ext)-1] = '\0'; \
 } while (0)
 #define CHECK_EXT(what, ext) do { \
 	has = _HAS_EXT(#ext); \
@@ -461,16 +501,16 @@ printDeviceInfo(cl_uint d)
 		CHECK_EXT(half, cl_khr_fp16);
 		CHECK_EXT(double, cl_khr_fp64);
 		CHECK_EXT(spir, cl_khr_spir);
-		if (!*has_double)
+		if (dev_has_double(&chk))
 			CHECK_EXT(double, cl_amd_fp64);
-		if (!*has_double)
+		if (dev_has_double(&chk))
 			CHECK_EXT(double, cl_APPLE_fp64_basic_ops);
 		CHECK_EXT(nv, cl_nv_device_attribute_query);
 		CHECK_EXT(amd, cl_amd_device_attribute_query);
-		CHECK_EXT(svm, cl_amd_svm);
+		CHECK_EXT(svm_ext, cl_amd_svm);
 		CHECK_EXT(fission, cl_ext_device_fission);
 		CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
-		if (!*has_atomic_counters)
+		if (dev_has_atomic_counters(&chk))
 			CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32);
 		CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer);
 		CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread);
@@ -503,9 +543,8 @@ printDeviceInfo(cl_uint d)
 	}
 	STR_PRINT("Device Type", strbuf);
 
-	is_gpu = !!(devtype & CL_DEVICE_TYPE_GPU);
 	STR_PARAM(PROFILE, "Profile");
-	if (*has_amd) {
+	if (dev_has_amd(&chk)) {
 		cl_device_topology_amd devtopo;
 
 		STR_PARAM(BOARD_NAME_AMD, "Board Name (AMD)");
@@ -528,7 +567,7 @@ printDeviceInfo(cl_uint d)
 		}
 		STR_PRINT("Device Topology (AMD)", strbuf);
 	}
-	if (*has_nv) {
+	if (dev_has_nv(&chk)) {
 		cl_uint bus, slot;
 		GET_PARAM(PCI_BUS_ID_NV, bus);
 		if (!had_error)
@@ -540,14 +579,14 @@ printDeviceInfo(cl_uint d)
 
 	// compute units and clock
 	INT_PARAM(MAX_COMPUTE_UNITS, "Max compute units",);
-	if (*has_amd && is_gpu) {
+	if (dev_is_gpu_amd(&chk)) {
 		// these are GPU-only
 		INT_PARAM(SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute units (AMD)",);
 		INT_PARAM(SIMD_WIDTH_AMD, "SIMD width (AMD)",);
 		INT_PARAM(SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)",);
 	}
 	INT_PARAM(MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz");
-	if (*has_nv) {
+	if (dev_has_nv(&chk)) {
 		GET_PARAM(COMPUTE_CAPABILITY_MAJOR_NV, uintval);
 		if (!had_error)
 			GET_PARAM(COMPUTE_CAPABILITY_MINOR_NV, uintval2);
@@ -557,7 +596,7 @@ printDeviceInfo(cl_uint d)
 			printf(I1_STR "%u.%u\n", "NVIDIA Compute Capability", uintval, uintval2);
 		}
 	}
-	if (*has_altera_dev_temp)
+	if (dev_has_altera_dev_temp(&chk))
 		INT_PARAM(CORE_TEMPERATURE_ALTERA, "Core temperature (Altera)", " C");
 
 	/* device fission, two different ways: core in 1.2, extension previously
@@ -565,19 +604,19 @@ printDeviceInfo(cl_uint d)
 	 * by name is not considered in OpenCL 1.2, but an option with the extension
 	 */
 	szval = 0;
-	if (is_12) {
-		strncpy(strbuf + szval, "core, ", *has_fission ? 6 : 4);
-		szval += (*has_fission ? 6 : 4);
+	if (dev_is_12(&chk)) {
+		strncpy(strbuf + szval, "core, ", chk.has_fission[0] ? 6 : 4);
+		szval += (chk.has_fission[0] ? 6 : 4);
 	}
-	if (*has_fission) {
-		strncpy(strbuf + szval, has_fission, bufsz - (szval + 1));
-		szval += strlen(has_fission);
+	if (dev_has_fission(&chk)) {
+		strncpy(strbuf + szval, chk.has_fission, bufsz - (szval + 1));
+		szval += strlen(chk.has_fission);
 	}
 	strbuf[szval] = 0;
 
 	printf(I1_STR "(%s)\n", "Device Partition",
 		szval ? strbuf : na);
-	if (is_12) {
+	if (dev_is_12(&chk)) {
 		INT_PARAM(PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices",);
 		GET_PARAM_ARRAY(PARTITION_PROPERTIES, partprop, szval);
 		numpartprop = szval/sizeof(*partprop);
@@ -625,7 +664,7 @@ printDeviceInfo(cl_uint d)
 			puts("");
 		}
 	}
-	if (*has_fission) {
+	if (dev_has_fission(&chk)) {
 		GET_PARAM_ARRAY(PARTITION_TYPES_EXT, partprop_ext, szval);
 		numpartprop_ext = szval/sizeof(*partprop_ext);
 		printf(I2_STR, "Supported partition types (ext)");
@@ -695,10 +734,10 @@ printDeviceInfo(cl_uint d)
 	else
 		printf(I1_STR "%s\n", "Preferred work group size multiple", strbuf);
 
-	if (*has_nv) {
+	if (dev_has_nv(&chk)) {
 		INT_PARAM(WARP_SIZE_NV, "Warp size (NVIDIA)",);
 	}
-	if (*has_amd && is_gpu) {
+	if (dev_is_gpu_amd(&chk)) {
 		INT_PARAM(WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)",);
 	}
 
@@ -723,9 +762,9 @@ printDeviceInfo(cl_uint d)
 	PRINT_VEC(SHORT, short);
 	PRINT_VEC(INT, int);
 	PRINT_VEC(LONG, long); // this is actually optional in EMBED profiles
-	PRINT_VEC_OPT(HALF, half, has_half);
+	PRINT_VEC_OPT(HALF, half, chk.has_half);
 	PRINT_VEC(FLOAT, float);
-	PRINT_VEC_OPT(DOUBLE, double, has_double);
+	PRINT_VEC_OPT(DOUBLE, double, chk.has_double);
 	puts("");
 
 	// FP configurations
@@ -750,14 +789,14 @@ printDeviceInfo(cl_uint d)
 #define FPSUPP_STR(str, opt) \
 	"  %-17s%-29s " opt "\n", #str "-precision", fpsupp
 	printf(FPSUPP_STR(Half, " (%s)"),
-		*has_half ? has_half : na);
-	if (*has_half)
+		chk.has_half[0] ? chk.has_half : na);
+	if (dev_has_half(&chk))
 		SHOW_FP_SUPPORT(HALF);
 	printf(FPSUPP_STR(Single, " (core)"));
 	SHOW_FP_SUPPORT(SINGLE);
 	printf(FPSUPP_STR(Double, " (%s)"),
-		*has_double ? has_double : na);
-	if (*has_double)
+		chk.has_double[0] ? chk.has_double : na);
+	if (dev_has_double(&chk))
 		SHOW_FP_SUPPORT(DOUBLE);
 
 	// arch bits and endianness
@@ -769,7 +808,7 @@ printDeviceInfo(cl_uint d)
 
 	// global
 	MEM_PARAM(GLOBAL_MEM_SIZE, "Global memory size");
-	if (*has_amd && is_gpu) {
+	if (dev_is_gpu_amd(&chk)) {
 		// FIXME seek better documentation about this. what does it mean?
 		GET_PARAM_ARRAY(GLOBAL_FREE_MEMORY_AMD, szvals, szval);
 		szels = szval/sizeof(*szvals);
@@ -786,24 +825,24 @@ printDeviceInfo(cl_uint d)
 	MEM_PARAM(MAX_MEM_ALLOC_SIZE, "Max memory allocation");
 
 	BOOL_PARAM(HOST_UNIFIED_MEMORY, "Unified memory for Host and Device");
-	if (*has_nv) {
+	if (dev_has_nv(&chk)) {
 		BOOL_PARAM(INTEGRATED_MEMORY_NV, "NVIDIA integrated memory");
 	}
 
-	// SVM TODO might also be supported by extensions on 1.2
-	if (is_20 || *has_svm) {
+	// SVM
+	if (dev_has_svm(&chk)) {
 		cl_device_svm_capabilities svm_cap;
 		GET_PARAM(SVM_CAPABILITIES, svm_cap);
 		if (!had_error) {
 			szval = 0;
 			strbuf[szval++] = '(';
-			if (is_20) {
-				strncpy(strbuf + szval, "core, ", *has_svm ? 6 : 4);
-				szval += (*has_svm ? 6 : 4);
+			if (dev_is_20(&chk)) {
+				strncpy(strbuf + szval, "core, ", chk.has_svm_ext[0] ? 6 : 4);
+				szval += (chk.has_svm_ext[0] ? 6 : 4);
 			}
-			if (*has_svm) {
-				strncpy(strbuf + szval, has_svm, bufsz - (szval + 2));
-				szval += strlen(has_svm);
+			if (dev_has_svm_ext(&chk)) {
+				strncpy(strbuf + szval, chk.has_svm_ext, bufsz - (szval + 2));
+				szval += strlen(chk.has_svm_ext);
 			}
 			strbuf[szval++] = ')';
 			strbuf[szval++] = 0;
@@ -823,7 +862,7 @@ printDeviceInfo(cl_uint d)
 		"Alignment of base address", uintval, uintval/8);
 
 	// atomics alignment
-	if (is_20) {
+	if (dev_is_20(&chk)) {
 		printf(I1_STR "\n", "Preferred alignment for atomics");
 		INT_PARAM(PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", "");
 		INT_PARAM(PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", "");
@@ -831,13 +870,13 @@ printDeviceInfo(cl_uint d)
 
 	}
 
-	if (*has_qcom_ext_host_ptr) {
+	if (dev_has_qcom_ext_host_ptr(&chk)) {
 		SZ_PARAM(PAGE_SIZE_QCOM, "Page size (QUALCOMM)", " bytes");
 		SZ_PARAM(EXT_MEM_PADDING_IN_BYTES_QCOM, "Externa memory padding (QUALCOMM)", " bytes");
 	}
 
 	// global variables
-	if (is_20) { // TODO some 1.2 devices respond to this too ...
+	if (dev_is_20(&chk)) { // TODO some 1.2 devices respond to this too ...
 		MEM_PARAM(MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable");
 		MEM_PARAM(GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars");
 	}
@@ -854,11 +893,11 @@ printDeviceInfo(cl_uint d)
 	BOOL_PARAM(IMAGE_SUPPORT, "Image support");
 	if (boolval) {
 		INT_PARAM(MAX_SAMPLERS, INDENT "Max number of samplers per kernel",);
-		if (is_12) {
+		if (dev_is_12(&chk)) {
 			SZ_PARAM(IMAGE_MAX_BUFFER_SIZE, INDENT "Max 1D image size", " pixels");
 			SZ_PARAM(IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", " images");
 		}
-		if (*has_image2d_buffer) {
+		if (dev_has_image2d_buffer(&chk)) {
 			SZ_PARAM(IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers",);
 			SZ_PARAM(IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers",);
 		}
@@ -873,13 +912,13 @@ printDeviceInfo(cl_uint d)
 			szvals[0], szvals[1], szvals[2]);
 		INT_PARAM(MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args",);
 		INT_PARAM(MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args",);
-		if (is_20) {
+		if (dev_is_20(&chk)) {
 			INT_PARAM(MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args",);
 		}
 	}
 
 	// pipes
-	if (is_20) {
+	if (dev_is_20(&chk)) {
 		INT_PARAM(MAX_PIPE_ARGS, "Max number of pipe args", "");
 		INT_PARAM(PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", "");
 		GET_PARAM(PIPE_MAX_PACKET_SIZE, uintval);
@@ -895,13 +934,13 @@ printDeviceInfo(cl_uint d)
 	STR_PRINT("Local memory type", local_mem_type_str[lmemtype]);
 	if (lmemtype != CL_NONE)
 		MEM_PARAM(LOCAL_MEM_SIZE, "Local memory size");
-	if (*has_amd && is_gpu) {
+	if (dev_is_gpu_amd(&chk)) {
 		MEM_PARAM(LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory size per CU (AMD)");
 		INT_PARAM(LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)",);
 	}
 
 	// nv: registers/CU
-	if (*has_nv) {
+	if (dev_has_nv(&chk)) {
 		INT_PARAM(REGISTERS_PER_BLOCK_NV, "NVIDIA registers per CU",);
 	}
 
@@ -911,25 +950,25 @@ printDeviceInfo(cl_uint d)
 	INT_PARAM(MAX_CONSTANT_ARGS, "Max number of constant args",);
 
 	MEM_PARAM(MAX_PARAMETER_SIZE, "Max size of kernel argument");
-	if (*has_atomic_counters)
+	if (dev_has_atomic_counters(&chk))
 		INT_PARAM(MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters",);
 
 	// queue and kernel capabilities
 
 	GET_PARAM(QUEUE_PROPERTIES, queueprop);
 	printf(I1_STR "%s\n",
-		(is_20 ? "Queue properties (on host)" : "Queue properties"),
+		(dev_is_20(&chk) ? "Queue properties (on host)" : "Queue properties"),
 		had_error ? strbuf : "");
 	if (!had_error) {
 		STR_PRINT(INDENT "Out-of-order execution", bool_str[!!(queueprop & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)]);
 		STR_PRINT(INDENT "Profiling", bool_str[!!(queueprop & CL_QUEUE_PROFILING_ENABLE)]);
 	}
-	if (*has_intel_local_thread) {
+	if (dev_has_intel_local_thread(&chk)) {
 		printf(I1_STR "%s\n", INDENT "Intel local thread execution", bool_str[1]);
 	}
 
 	// queues on device
-	if (is_20) {
+	if (dev_is_20(&chk)) {
 		GET_PARAM(QUEUE_ON_DEVICE_PROPERTIES, queueprop);
 		printf(I1_STR "%s\n", "Queue properties (on device)",
 			had_error ? strbuf : "");
@@ -955,7 +994,7 @@ printDeviceInfo(cl_uint d)
 
 
 	SZ_PARAM(PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns");
-	if (*has_amd) {
+	if (dev_has_amd(&chk)) {
 		time_t time;
 		char *nl;
 		GET_PARAM(PROFILING_TIMER_OFFSET_AMD, ulongval);
@@ -972,16 +1011,16 @@ printDeviceInfo(cl_uint d)
 	GET_PARAM(EXECUTION_CAPABILITIES, execap);
 	STR_PRINT(INDENT "Run OpenCL kernels", bool_str[!!(execap & CL_EXEC_KERNEL)]);
 	STR_PRINT(INDENT "Run native kernels", bool_str[!!(execap & CL_EXEC_NATIVE_KERNEL)]);
-	if (*has_nv) {
+	if (dev_has_nv(&chk)) {
 		BOOL_PARAM(KERNEL_EXEC_TIMEOUT_NV, INDENT "NVIDIA kernel execution timeout");
 		BOOL_PARAM(GPU_OVERLAP_NV, "NVIDIA concurrent copy and kernel execution");
 		INT_PARAM(ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of copy engines",);
 	}
-	if (*has_spir) {
+	if (dev_has_spir(&chk)) {
 		SHOW_STRING(clGetDeviceInfo, CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", dev);
 	}
 
-	if (is_12) {
+	if (dev_is_12(&chk)) {
 		BOOL_PARAM(PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interops");
 		MEM_PARAM(PRINTF_BUFFER_SIZE, "printf() buffer size");
 		STR_PARAM(BUILT_IN_KERNELS, "Built-in kernels");
@@ -990,7 +1029,7 @@ printDeviceInfo(cl_uint d)
 	// misc. availability
 	BOOL_PARAM(AVAILABLE, "Device Available");
 	BOOL_PARAM(COMPILER_AVAILABLE, "Compiler Available");
-	if (is_12)
+	if (dev_is_12(&chk))
 		BOOL_PARAM(LINKER_AVAILABLE, "Linker Available");
 
 	// and finally the extensions

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/clinfo.git



More information about the Pkg-opencl-commits mailing list