[clinfo] 13/148: AMD device attributes (plus NV warp size)
Andreas Beckmann
anbe at moszumanska.debian.org
Mon Nov 17 14:09:39 UTC 2014
This is an automated email from the git hooks/post-receive script.
anbe pushed a commit to branch clinfo
in repository clinfo.
commit 76edbd75a52ab740adad686ef4ebeb3bbde36b89
Author: Giuseppe Bilotta <giuseppe.bilotta at gmail.com>
Date: Thu Jun 6 17:20:33 2013 +0200
AMD device attributes (plus NV warp size)
---
src/clinfo.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 58 insertions(+), 5 deletions(-)
diff --git a/src/clinfo.c b/src/clinfo.c
index 058cc54..64a799c 100644
--- a/src/clinfo.c
+++ b/src/clinfo.c
@@ -91,8 +91,7 @@ printPlatformInfo(cl_uint p)
#define GET_PARAM_ARRAY(param, var, num) do { \
error = clGetDeviceInfo(dev, CL_DEVICE_##param, 0, NULL, &num); \
CHECK_ERROR("get number of " #param); \
- var = malloc(num); \
- CHECK_MEM(var, #param); \
+ REALLOC(var, num/sizeof(*var), #param); \
error = clGetDeviceInfo(dev, CL_DEVICE_##param, num, var, NULL); \
CHECK_ERROR("get " #param); \
} while (0)
@@ -137,11 +136,13 @@ printDeviceInfo(cl_uint d)
char has_half[12] = {0};
char has_double[12] = {0};
char has_nv[29] = {0};
+ char has_amd[30] = {0};
char has_fission[22] = {0};
char has_atomic_counters[26] = {0};
// device supports OpenCL 1.2
- cl_bool is_12 = 0;
+ cl_bool is_12 = CL_FALSE;
+ cl_bool is_gpu = CL_FALSE;
#define KB 1024UL
#define MB (KB*KB)
@@ -167,6 +168,10 @@ printDeviceInfo(cl_uint d)
GET_PARAM(param, uintval); \
printf(" %-46s: %u" sfx "\n", name, uintval); \
} while (0)
+#define LONG_PARAM(param, name, sfx) do { \
+ GET_PARAM(param, ulongval); \
+ printf(" %-46s: %u" sfx "\n", name, ulongval); \
+} while (0)
#define SZ_PARAM(param, name, sfx) do { \
GET_PARAM(param, szval); \
printf(" %-46s: %zu" sfx "\n", name, szval); \
@@ -220,6 +225,7 @@ printDeviceInfo(cl_uint d)
if (!*has_double)
CHECK_EXT(double, cl_amd_fp64);
CHECK_EXT(nv, cl_nv_device_attribute_query);
+ CHECK_EXT(amd, cl_amd_device_attribute_query);
CHECK_EXT(fission, cl_ext_device_fission);
CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
if (!*has_atomic_counters)
@@ -231,10 +237,21 @@ printDeviceInfo(cl_uint d)
GET_PARAM(TYPE, devtype);
// FIXME this can be a combination of flags
STR_PRINT("Device Type", device_type_str[ffs(devtype)]);
+ is_gpu = !!(devtype & CL_DEVICE_TYPE_GPU);
STR_PARAM(PROFILE, "Device Profile");
+ if (*has_amd) {
+ // TODO CL_DEVICE_TOPOLOGY_AMD
+ STR_PARAM(BOARD_NAME_AMD, "Board Name");
+ }
// compute units and clock
INT_PARAM(MAX_COMPUTE_UNITS, "Max compute units",);
+ if (*has_amd && is_gpu) {
+ // these are GPU-only
+ INT_PARAM(SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute units (AMD)",);
+ INT_PARAM(SIMD_WIDTH_AMD, "SIMD width (AMD)",);
+ INT_PARAM(SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)",);
+ }
INT_PARAM(MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz");
if (*has_nv) {
GET_PARAM(COMPUTE_CAPABILITY_MAJOR_NV, uintval);
@@ -366,6 +383,13 @@ printDeviceInfo(cl_uint d)
printf(" %-44s: %zu\n", buffer , szvals[cursor]);
}
SZ_PARAM(MAX_WORK_GROUP_SIZE, "Max work group size",);
+ // TODO CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE on a simple kernel
+ if (*has_nv) {
+ INT_PARAM(WARP_SIZE_NV, "Warp size (NVIDIA)",);
+ }
+ if (*has_amd && is_gpu) {
+ INT_PARAM(WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)",);
+ }
// preferred/native vector widths
printf(" %-46s:", "Preferred / native vector sizes");
@@ -424,6 +448,26 @@ printDeviceInfo(cl_uint d)
// global
MEM_PARAM(GLOBAL_MEM_SIZE, "Global memory size");
+ if (*has_amd && is_gpu) {
+ // FIXME seek better documentation about this. what does it mean?
+ GET_PARAM_ARRAY(GLOBAL_FREE_MEMORY_AMD, szvals, szval);
+ szels = szval/sizeof(*szvals);
+ for (cursor = 0; cursor < szels; ++cursor) {
+ doubleval = szvals[cursor];
+ if (szvals[cursor] > KB) {
+ snprintf(buffer, bufsz, " (%6.4lg%s)",
+ MEM_SIZE(doubleval),
+ MEM_PFX(doubleval));
+ buffer[bufsz-1] = '\0';
+ } else buffer[0] = '\0';
+ printf(" %-46s: %lu%s\n", "Free global memory (AMD)", szvals[cursor], buffer);
+ }
+
+ INT_PARAM(GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)",);
+ INT_PARAM(GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)",);
+ INT_PARAM(GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", " bytes");
+ }
+
BOOL_PARAM(ERROR_CORRECTION_SUPPORT, "Error Correction support");
MEM_PARAM(MAX_MEM_ALLOC_SIZE, "Max memory allocation");
BOOL_PARAM(HOST_UNIFIED_MEMORY, "Unified memory for Host and Device");
@@ -466,9 +510,14 @@ printDeviceInfo(cl_uint d)
// local
GET_PARAM(LOCAL_MEM_TYPE, lmemtype);
- STR_PRINT("Local Memory type", local_mem_type_str[lmemtype]);
+ STR_PRINT("Local memory type", local_mem_type_str[lmemtype]);
if (lmemtype != CL_NONE)
- MEM_PARAM(LOCAL_MEM_SIZE, "Local Memory size");
+ MEM_PARAM(LOCAL_MEM_SIZE, "Local memory size");
+ if (*has_amd && is_gpu) {
+ MEM_PARAM(LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory size per CU (AMD)");
+ INT_PARAM(LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)",);
+ }
+
// constant
MEM_PARAM(MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size");
@@ -489,6 +538,10 @@ printDeviceInfo(cl_uint d)
STR_PRINT(" Out-of-order execution", bool_str[!!(queueprop & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)]);
STR_PRINT(" Profiling", bool_str[!!(queueprop & CL_QUEUE_PROFILING_ENABLE)]);
SZ_PARAM(PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns");
+ if (*has_amd) {
+ // TODO print this in a more meaningful way
+ LONG_PARAM(PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", "ns");
+ }
printf(" %-46s:\n", "Execution capabilities");
GET_PARAM(EXECUTION_CAPABILITIES, execap);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/clinfo.git
More information about the Pkg-opencl-commits
mailing list