pax_global_header00006660000000000000000000000064132616052110014507gustar00rootroot0000000000000052 comment=59d0daf898e48d76ccbb788acbba258fa0a8ba7c clinfo-2.2.18.04.06/000077500000000000000000000000001326160521100135215ustar00rootroot00000000000000clinfo-2.2.18.04.06/.appveyor.yml000066400000000000000000000005301326160521100161650ustar00rootroot00000000000000version: 2.2.18.03.22-{build} image: Visual Studio 2015 shallow_clone: true platform: - x86 - x64 init: - cmd: C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM% install: - cmd: fetch-opencl-dev-win.cmd %PLATFORM% build_script: - cmd: make.cmd test_script: - cmd: clinfo artifacts: - path: clinfo.exe clinfo-2.2.18.04.06/.gitignore000066400000000000000000000000221326160521100155030ustar00rootroot00000000000000clinfo .*.swp *.o clinfo-2.2.18.04.06/.travis.yml000066400000000000000000000003501326160521100156300ustar00rootroot00000000000000os: - linux - osx dist: trusty addons: apt: sources: - sourceline: "deb http://archive.ubuntu.com/ubuntu trusty universe" packages: - ocl-icd-opencl-dev language: c compiler: - gcc - clang script: make && ./clinfo clinfo-2.2.18.04.06/LICENSE000066400000000000000000000004731326160521100145320ustar00rootroot00000000000000clinfo by Giuseppe Bilotta To the extent possible under law, the person who associated CC0 with clinfo has waived all copyright and related or neighboring rights to clinfo. You should have received a copy of the CC0 legalcode along with this work. If not, see clinfo-2.2.18.04.06/Makefile000066400000000000000000000026601326160521100151650ustar00rootroot00000000000000# An interesting trick run a shell command: # GNU Make uses $(shell cmd), whereas # BSD make use $(var:sh), where ${var} holds the command OS.exec = uname -s OS ?= $(shell $(OS.exec))$(OS.exec:sh) OS := $(OS) # Headers PROG = clinfo MAN = man1/$(PROG).1 HDR = src/error.h \ src/ext.h \ src/ctx_prop.h \ src/fmtmacros.h \ src/memory.h \ src/ms_support.h \ src/info_loc.h \ src/info_ret.h \ src/opt_out.h \ src/strbuf.h VPATH = src CFLAGS ?= -g -pedantic -Werror CFLAGS += -std=c99 -Wall -Wextra SPARSE ?= sparse SPARSEFLAGS=-Wsparse-all -Wno-decl # BSD make does not define RM RM ?= rm -f # Installation paths and modes PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin BINMODE ?= 555 MANDIR ?= $(PREFIX)/man MANMODE ?= 444 # Common library includes LDLIBS = -lOpenCL -ldl # OS-specific library includes LDLIBS_Darwin = -framework OpenCL LDLIBS_Darwin_exclude = -lOpenCL LDLIBS += $(LDLIBS_${OS}) # Remove -lOpenCL if OS is Darwin LDLIBS := $(LDLIBS:$(LDLIBS_${OS}_exclude)=) # # Standard targets # $(PROG): $(PROG).o $(PROG).o: $(PROG).c $(HDR) clean: $(RM) $(PROG).o $(PROG) $(BINDIR): install -d $@ $(MANDIR)/man1: install -d $@ $(BINDIR)/$(PROG): $(PROG) $(BINDIR) install -p -m $(BINMODE) $(PROG) $@ $(MANDIR)/$(MAN): $(MAN) $(MANDIR)/man1 install -p -m $(MANMODE) $(MAN) $@ install: $(BINDIR)/$(PROG) $(MANDIR)/$(MAN) sparse: $(PROG).c $(SPARSE) $(CPPFLAGS) $(CFLAGS) $(SPARSEFLAGS) $^ .PHONY: clean sparse install clinfo-2.2.18.04.06/Makefile.win000066400000000000000000000032401326160521100157540ustar00rootroot00000000000000# TODO FIXME find a better way to detect the directory to use # for OpenCL development files !IF "$(OPENCLDIR)" == "" OPENCLDIR = $(INTELOCLSDKROOT) !ENDIF !IF "$(OPENCLDIR)" == "" OPENCLDIR = $(AMDAPPSDKROOT) !ENDIF !IF "$(OPENCLDIR)" == "" OPENCLDIR = $(MAKEDIR) !ENDIF !IF "$(OPENCLDIR)" == "" OPENCLDIR = . !ENDIF !MESSAGE OpenCL dir: $(OPENCLDIR) HDR = src/error.h \ src/ext.h \ src/ctx_prop.h \ src/fmtmacros.h \ src/memory.h \ src/ms_support.h \ src/info_loc.h \ src/info_ret.h \ src/opt_out.h \ src/strbuf.h CFLAGS = /GL /Ox /W4 /Zi /I"$(OPENCLDIR)\include" /nologo LIBS = libOpenCL.a # TODO there's most likely a better way to do the multiarch # switching !IF "$(PROCESSOR_ARCHITECTURE)" == "AMD64" ARCH=64 !ELSE ARCH=32 !ENDIF # Platform=x64 in the 64-bit cross-platform build of my VS !IF "$(PLATFORM)" == "x64" || "$(PLATFORM)" == "X64" ARCH=64 !ELSE IF "$(PLATFORM)" == "x86" || "$(PLATFORM)" == "X86" ARCH=32 !ENDIF !MESSAGE Building for $(ARCH)-bit (processor architecture: $(PROCESSOR_ARCHITECTURE), platform: $(PLATFORM)) LIBPATH32 = /LIBPATH:"$(OPENCLDIR)\lib" /LIBPATH:"$(OPENCLDIR)\lib\x86" LIBPATH64 = /LIBPATH:"$(OPENCLDIR)\lib\x64" /LIBPATH:"$(OPENCLDIR)\lib\x86_64" /LIBPATH:"$(OPENCLDIR)\lib\x86_amd64" # And since we can't do $(LIBPATH$(ARCH)) with nmake ... !IF "$(ARCH)" == "64" LINKOPTS = /LTCG $(LIBPATH64) /nologo !ELSE LINKOPTS = /LTCG $(LIBPATH32) /nologo !ENDIF clinfo.exe: clinfo.obj link $(LINKOPTS) $(LIBS) clinfo.obj /out:clinfo.exe clinfo.obj: src/clinfo.c $(HDR) $(CC) $(CFLAGS) /c src/clinfo.c /Foclinfo.obj clean: del /F /Q clinfo.exe clinfo.obj .PHONY: clean clinfo-2.2.18.04.06/README.md000066400000000000000000000051261326160521100150040ustar00rootroot00000000000000# What is this? clinfo is a simple command-line application that enumerates all possible (known) properties of the OpenCL platform and devices available on the system. Inspired by AMD's program of the same name, it is coded in pure C and it tries to output all possible information, including those provided by platform-specific extensions, trying not to crash on unsupported properties (e.g. 1.2 properties on 1.1 platforms). # Usage clinfo [options...] Common used options are `-l` to show a synthetic summary of the available devices (without properties), and `-a`, to try and show properties even if `clinfo` would otherwise think they aren't supported by the platform or device. Refer to the man page for further information. ## Use cases * verify that your OpenCL environment is set up correctly; if `clinfo` cannot find any platform or devices (or fails to load the OpenCL dispatcher library), chances are high no other OpenCL application will run; * verify that your OpenCL _development_ environment is set up correctly: if `clinfo` fails to build, chances are high no other OpenCL application will build; * explore/report the actual properties of the available device(s). # Building Build status on Travis Building requires an OpenCL SDK (or at least OpenCL headers and development files), and the standard build environment for the platform. No special build system is used (autotools, CMake, meson, ninja, etc), as I feel adding more dependencies for such a simple program would be excessive. Simply running `make` at the project root should work. ## Windows support The application can usually be built in Windows too (support for which required way more time than I should have spent, really, but I digress), by running `make` in a Developer Command Prompt for Visual Studio, provided an OpenCL SDK (such as the Intel or AMD one) is installed. Precompiled Windows executable are available as artefacts of the AppVeyor CI.
Build statusWindows binaries
Build status on AppVeyor 32-bit 64-bit
clinfo-2.2.18.04.06/fetch-opencl-dev-win.cmd000066400000000000000000000010401326160521100201170ustar00rootroot00000000000000REM call as fetch-opencl-dev-win x86|x86_64|x64 git clone https://github.com/KhronosGroup/OpenCL-Headers move OpenCL-Headers/opencl22 include set sub=%1 if /i "%sub%" == "x64" set sub=x86_64 mkdir lib\%sub% curl -L -o lib/%sub%/libOpenCL.a https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/lib/%sub%/libOpenCL.a -o lib/%sub%/OpenCL.lib https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/lib/%sub%/OpenCL.lib -o OpenCL.dll https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/bin/%sub%/OpenCL.dll clinfo-2.2.18.04.06/legalcode.txt000066400000000000000000000156101326160521100162040ustar00rootroot00000000000000Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. clinfo-2.2.18.04.06/make.cmd000066400000000000000000000000421326160521100151170ustar00rootroot00000000000000nmake /F Makefile.win /nologo %* clinfo-2.2.18.04.06/man1/000077500000000000000000000000001326160521100143555ustar00rootroot00000000000000clinfo-2.2.18.04.06/man1/clinfo.1000066400000000000000000000170101326160521100157100ustar00rootroot00000000000000.TH CLINFO 1 "2018-04-06" "clinfo 2.2.18.04.06" .SH NAME clinfo \- show OpenCL platforms and devices .SH SYNOPSIS .B clinfo .RI [ "options ..." ] .SH DESCRIPTION .B clinfo prints all available information about all OpenCL platforms available on the system and the devices they expose. .SH OPTIONS .B clinfo accepts the following options: .TP 2 .BR -a ", " --all-props try to retrieve all properties, even those not officially supported (e.g. because they require specific extensions), but only show them if the property could be retrieved successfully; see also the .B LIMITATIONS section below; note that even though this may reveal hidden properties, there is no guarantee that the returned values are meaningful, nor that the corresponding feature is actually available at all; .TP .BR -A ", " --always-all-props like .BR -a , but also show errors; .TP .B --human produce human-friendly output; this is the default (except as noted below); .TP .B --raw produce machine-friendly output; this is the default if .B clinfo is invoked with a name that contains the string .RI \*(lq raw \*(rq; .TP .B --offline shows also offline devices for platforms that expose this feature; .TP .BR -l ", " --list list platforms and devices by name, with no (other) properties; .TP .BR -h ", " -? show usage; .TP .BR --version ", " -v show program version. .SH CONFORMING TO OpenCL 1.1, OpenCL 1.2, OpenCL 2.0, OpenCL 2.1, OpenCL 2.2. .SH EXTENSIONS Supported OpenCL extensions: .TP 2 .BR cl_khr_fp16 ", " cl_khr_fp64 ", " cl_amd_fp64 ", " cl_APPLE_fp64_basic_ops for information about support for half-precision and double-precision floating-point data types; .TP .B cl_khr_image2d_from_buffer for information about the base address and pitch alignment requirements of buffers to be used as base for 2D images; .TP .B cl_khr_il_program for information about the supported IL (Intermediate Language) representations; .TP .B cl_khr_spir for information about the supported SPIR (Standard Portable Intermediate Representation) versions; .TP .B cl_khr_icd for the suffix of vendor extensions functions; .TP .B cl_khr_subgroup_named_barrier for the maximum number of named sub-group barriers; .TP .B cl_khr_terminate_context for the terminate capabilities for the device; .TP .B cl_ext_device_fission for device fission support in OpenCL 1.1 devices; .TP .B cl_ext_atomic_counters_32 .TQ .B cl_ext_atomic_counters_64 for the atomic counter extension; .TP .B cl_amd_device_attribute_query for AMD-specific device attributes; .TP .B cl_amd_object_metadata to show the maximum number of keys supported by the platform; .TP .B cl_amd_offline_devices to show offline devices exposed by the platform, if requested (see .B --offline option); .TP .B cl_amd_copy_buffer_p2p to show the number and IDs of available P2P devices; .TP .B cl_amd_svm .TQ .B cl_arm_shared_virtual_memory for Shared Virtual Memory (SVM) capabilities in OpenCL 1.2 devices; .TP .B cl_nv_device_attribute_query for NVIDIA-specific device attributes; .TP .B cl_intel_exec_by_local_thread for the Intel extension allowing CPU devices to run kernels as part of the current host thread; .TP .B cl_intel_advanced_motion_estimation for the version of the Intel Motion Estimation accelerator version; .TP .B cl_intel_device_side_avc_motion_estimation for the version and supported features of Intel's device-side AVC Motion; .TP .B cl_intel_planar_yuv for the maximum dimensions of planar YUV images; .TP .B cl_intel_simultaneous_sharing for simultaneous CL/GL/DirectX context sharing (only partial support); .TP .B cl_intel_required_subgroup_size to enumerate allowed sub-group sizes; .TP .B cl_altera_device_temperature for the Altera extension to query the core temperature of the device; .TP .B cl_qcom_ext_host_ptr for the QUALCOMM extension to query page size and required padding in external memory allocation. .SH NOTES Some information is duplicated when available from multiple sources. Examples: .IP \(bu 2 supported device partition types and domains as obtained using the .B cl_ext_device_fission extension typically match the ones obtained using the core OpenCL 1.2 device partition feature; .IP \(bu the preferred work-group size multiple matches the NVIDIA warp size (on NVIDIA devices) or the AMD wavefront width (on AMD devices). .P Some floating-point configuration flags may only be meaningful for specific precisions and/or specific OpenCL versions. For example, .B CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only relevant for single precision in OpenCL 1.2 devices. .P The implementation-defined behavior for NULL platform or context properties is tested for the following API calls: .TP 2 .B clGetPlatformInfo() by trying to show the platform name; .TP .B clGetDeviceIDs() by trying to enumerate devices; the corresponding platform (if any) is then detected by querying the device platform of the first device; .TP .B clCreateteContext() by trying to create a context from a device from the previous list (if any), and a context from a device from a different platform; .TP .B clCreateteContextFromType() by trying to create contexts for each device type (except DEFAULT). .SH EXPERIMENTAL FEATURES .P Support for OpenCL 2.x properties is not fully tested. .P Support for .B cl_khr_subgroup_named_barrier is experimental due to missing definitions in the official OpenCL headers. .P Raw (machine-parsable) output is considered experimental, the output format might still undergo changes. .P The properties of the ICD loader will also be queried if the .B clGetICDLoaderInfoOCLICD extension function is found. .P Support for the properties exposed by .B cl_amd_copy_buffer_p2p is experimental. .P Support for some (documented and undocumented) properties exposed by .B cl_amd_device_attribute_query is experimental (see also .BR LIMITATIONS ). .P Support for the interop lists exposed by .B cl_intel_simultaneous_sharing is experimental. .P The highest OpenCL version supported by the ICD loader is detected with some trivial heuristics (symbols found); a notice is output if this is lower than the highest platform OpenCL version, or if the detected version doesn't match the one declared by the ICD loader itself. .SH LIMITATIONS .P OpenCL provides no explicit mean to detect the supported version of any extension exposed by a device, which makes it impossible to determine a priori if it will be possible to successfully query a device about a specific property. Additionally, the actual size and meaning of some properties are not officially declared anywhere. .P Most notably, this affects extensions such as .B cl_amd_device_attribute_query and .BR cl_nv_device_attribute_query . Heuristics based on standard version support are partially used in the code to determine which version may be supported. .P Properties which are known to be affected by these limitations include: .TP 2 .B CL_DEVICE_GLOBAL_FREE_MEMORY_AMD documented in v3 of the .B cl_amd_device_attribute_query extension specification as being the global free memory in KBytes, without any explanation given on why there are two values; .TP .B CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD documented in v3 of the .B cl_amd_device_attribute_query extension specification, but not reported by drivers supporting other v3 properties. This has now been enabled for drivers .I assumed to support v4 of the same extension; .TP .B CL_DEVICE_TERMINATE_CAPABILITY_KHR exposed by the .B cl_khr_terminate_context has changed value between OpenCL 1.x and 2.x, and it's .I allegedly a bitfield, whose values are however not defined anywhere. .SH BUGS .P Please report any issues on .UR http://github.com/Oblomov/clinfo the project tracker on GitHub .UE . clinfo-2.2.18.04.06/new-version.sh000077500000000000000000000013141326160521100163330ustar00rootroot00000000000000#!/bin/sh # Change the version recorded in src/clinfo.c and man1/clinfo.1 to # the current highest OpenCL supported standard followed by current # yy.mm.dd abort() { echo "$1" >&2 exit 1 } test -n "$(git status --porcelain | grep -v '??')" && abort "Uncommited changes, aborting" DATE=$(date +%Y-%m-%d) MAJOR=$(awk '/^OpenCL/ { print $NF ; exit }' man1/clinfo.1) SUBV=$(date +%y.%m.%d) VERSION="$MAJOR$SUBV" sed -i -e "/clinfo version/ s/version \S\+\"/version $VERSION\"/" src/clinfo.c && sed -i -e "1 s/\".\+$/\"$DATE\" \"clinfo $VERSION\"/" man1/clinfo.1 && sed -i -e "1 s/\".\+$/version: $VERSION-{build}/" .appveyor.yml && git commit -m "Version $VERSION" -e -a && git tag -m "Version $VERSION" $VERSION clinfo-2.2.18.04.06/src/000077500000000000000000000000001326160521100143105ustar00rootroot00000000000000clinfo-2.2.18.04.06/src/clinfo.c000066400000000000000000003222431326160521100157340ustar00rootroot00000000000000/* Collect all available information on all available devices * on all available OpenCL platforms present in the system */ #include #include /* We will want to check for symbols in the OpenCL library. * On Windows, we must get the module handle for it, on Unix-like * systems we can just use RTLD_DEFAULT */ #ifdef _MSC_VER # include # define dlsym GetProcAddress # define DL_MODULE GetModuleHandle("OpenCL") #else # include # define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */ #endif /* Load STDC format macros (PRI*), or define them * for those crappy, non-standard compilers */ #include "fmtmacros.h" // More support for the horrible MS C compiler #ifdef _MSC_VER #include "ms_support.h" #endif #include "error.h" #include "memory.h" #include "strbuf.h" #include "ext.h" #include "ctx_prop.h" #include "info_loc.h" #include "info_ret.h" #include "opt_out.h" #define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar)) #ifndef UNUSED #define UNUSED(x) x __attribute__((unused)) #endif struct platform_data { char *pname; /* CL_PLATFORM_NAME */ char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */ cl_uint ndevs; /* number of devices */ cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */ }; struct platform_info_checks { cl_uint plat_version; cl_bool has_khr_icd; cl_bool has_amd_object_metadata; }; struct platform_list { /* Number of platforms in the system */ cl_uint num_platforms; /* Total number of devices across all platforms */ cl_uint ndevs_total; /* Number of devices allocated in all_devs array */ cl_uint alloc_devs; /* Highest OpenCL version supported by any platform. * If the OpenCL library / ICD loader only supports * a lower version, problems may arise (such as * API calls causing segfaults or any other unexpected * behavior */ cl_uint max_plat_version; /* Largest number of devices on any platform */ cl_uint max_devs; /* Length of the longest platform sname */ cl_int max_sname_len; /* Array of platform IDs */ cl_platform_id *platform; /* Array of device IDs (across all platforms) */ cl_device_id *all_devs; /* Array of offsets in all_devs where the devices * of each platform begin */ cl_uint *dev_offset; /* Array of clinfo-specific platform data */ struct platform_data *pdata; /* Arrau of clinfo-specifici platform checks */ struct platform_info_checks *platform_checks; }; void init_plist(struct platform_list *plist) { plist->num_platforms = 0; plist->ndevs_total = 0; plist->alloc_devs = 0; plist->max_plat_version = 0; plist->platform = NULL; plist->all_devs = NULL; plist->dev_offset = NULL; plist->pdata = NULL; plist->platform_checks = NULL; } void plist_devs_reserve(struct platform_list *plist, cl_uint amount) { if (amount > plist->alloc_devs) { REALLOC(plist->all_devs, amount, "all devices"); plist->alloc_devs = amount; } } void alloc_plist(struct platform_list *plist) { ALLOC(plist->platform, plist->num_platforms, "platform IDs"); ALLOC(plist->dev_offset, plist->num_platforms, "platform device list offset"); /* The actual sizing for this will change as we gather platform info, * but assume at least one device per platform */ plist_devs_reserve(plist, plist->num_platforms); ALLOC(plist->pdata, plist->num_platforms, "platform data"); ALLOC(plist->platform_checks, plist->num_platforms, "platform checks data"); } void free_plist(struct platform_list *plist) { free(plist->platform); free(plist->all_devs); free(plist->dev_offset); free(plist->pdata); free(plist->platform_checks); init_plist(plist); } const cl_device_id * get_platform_devs(const struct platform_list *plist, cl_uint p) { return plist->all_devs + plist->dev_offset[p]; } cl_device_id get_platform_dev(const struct platform_list *plist, cl_uint p, cl_uint d) { return get_platform_devs(plist, p)[d]; } /* Data for the OpenCL library / ICD loader */ struct icdl_data { /* auto-detected OpenCL version support for the ICD loader */ cl_uint detected_version; /* OpenCL version support declared by the ICD loader */ cl_uint reported_version; }; /* line prefix, used to identify the platform/device for each * device property in RAW output mode */ char *line_pfx; int line_pfx_len; #define CHECK_SIZE(ret, loc, val, cmd, ...) do { \ /* check if the issue is with param size */ \ if (output->check_size && ret->err == CL_INVALID_VALUE) { \ size_t _actual_sz; \ if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { \ REPORT_SIZE_MISMATCH(&(ret->err_str), loc, _actual_sz, sizeof(val)); \ } \ } \ } while (0) static const char unk[] = "Unknown"; static const char none[] = "None"; static const char none_raw[] = "CL_NONE"; static const char na[] = "n/a"; // not available static const char na_wrap[] = "(n/a)"; // not available static const char core[] = "core"; static const char bytes_str[] = " bytes"; static const char pixels_str[] = " pixels"; static const char images_str[] = " images"; static const char* bool_str[] = { "No", "Yes" }; static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" }; static const char* endian_str[] = { "Big-Endian", "Little-Endian" }; static const cl_device_type devtype[] = { 0, CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL }; const size_t devtype_count = ARRAY_SIZE(devtype); /* number of actual device types, without ALL */ const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1; static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" }; static const char* device_type_raw_str[] = { unk, "CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL" }; static const char* partition_type_str[] = { none, "equally", "by counts", "by affinity domain", "by names (Intel)" }; static const char* partition_type_raw_str[] = { none_raw, "CL_DEVICE_PARTITION_EQUALLY_EXT", "CL_DEVICE_PARTITION_BY_COUNTS_EXT", "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT", "CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT" }; static const char numa[] = "NUMA"; static const char l1cache[] = "L1 cache"; static const char l2cache[] = "L2 cache"; static const char l3cache[] = "L3 cache"; static const char l4cache[] = "L4 cache"; static const char* affinity_domain_str[] = { numa, l4cache, l3cache, l2cache, l1cache, "next partitionable" }; static const char* affinity_domain_ext_str[] = { numa, l4cache, l3cache, l2cache, l1cache, "next fissionable" }; static const char* affinity_domain_raw_str[] = { "CL_DEVICE_AFFINITY_DOMAIN_NUMA", "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE" }; static const char* affinity_domain_raw_ext_str[] = { "CL_AFFINITY_DOMAIN_NUMA_EXT", "CL_AFFINITY_DOMAIN_L4_CACHE_EXT", "CL_AFFINITY_DOMAIN_L3_CACHE_EXT", "CL_AFFINITY_DOMAIN_L2_CACHE_EXT", "CL_AFFINITY_DOMAIN_L1_CACHE_EXT", "CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT" }; const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str); static const char *terminate_capability_str[] = { "Context" }; static const char *terminate_capability_raw_str[] = { "CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR" }; const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str); static const char* fp_conf_str[] = { "Denormals", "Infinity and NANs", "Round to nearest", "Round to zero", "Round to infinity", "IEEE754-2008 fused multiply-add", "Support is emulated in software", "Correctly-rounded divide and sqrt operations" }; static const char* fp_conf_raw_str[] = { "CL_FP_DENORM", "CL_FP_INF_NAN", "CL_FP_ROUND_TO_NEAREST", "CL_FP_ROUND_TO_ZERO", "CL_FP_ROUND_TO_INF", "CL_FP_FMA", "CL_FP_SOFT_FLOAT", "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT" }; const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str); static const char* svm_cap_str[] = { "Coarse-grained buffer sharing", "Fine-grained buffer sharing", "Fine-grained system sharing", "Atomics" }; static const char* svm_cap_raw_str[] = { "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER", "CL_DEVICE_SVM_FINE_GRAIN_BUFFER", "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM", "CL_DEVICE_SVM_ATOMICS", }; const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str); /* SI suffixes for memory sizes. Note that in OpenCL most of them are * passed via a cl_ulong, which at most can mode 16 EiB, but hey, * let's be forward-thinking ;-) */ static const char* memsfx[] = { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" }; const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1; static const char* lmem_type_str[] = { none, "Local", "Global" }; static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" }; static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" }; static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" }; static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" }; static const char* queue_prop_raw_str[] = { "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", "CL_QUEUE_PROFILING_ENABLE" }; const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str); static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" }; static const char* execap_raw_str[] = { "CL_EXEC_KERNEL", "CL_EXEC_NATIVE_KERNEL" }; const size_t execap_count = ARRAY_SIZE(execap_str); static const char* sources[] = { "#define GWO(type) global type* restrict\n", "#define GRO(type) global const type* restrict\n", "#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n", "#define _KRN(T, N) void kernel sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n", "#define KRN(N) _KRN(float, N)\n", "KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n", }; const char *num_devs_header(const struct opt_out *output, cl_bool these_are_offline) { return output->mode == CLINFO_HUMAN ? (these_are_offline ? "Number of offine devices (AMD)" : "Number of devices") : (these_are_offline ? "#OFFDEVICES" : "#DEVICES"); } const char *not_specified(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? na_wrap : ""; } const char *no_plat(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "No platform" : "CL_INVALID_PLATFORM"; } const char *invalid_dev_type(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "Invalid device type for platform" : "CL_INVALID_DEVICE_TYPE"; } const char *invalid_dev_value(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "Invalid device type value for platform" : "CL_INVALID_VALUE"; } const char *no_dev_found(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "No devices found in platform" : "CL_DEVICE_NOT_FOUND"; } const char *no_dev_avail(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "No devices available in platform" : "CL_DEVICE_NOT_AVAILABLE"; } /* OpenCL context interop names */ typedef struct cl_interop_name { cl_uint from; cl_uint to; /* 5 because that's the largest we know of, * 2 because it's HUMAN, RAW */ const char *value[5][2]; } cl_interop_name; static const cl_interop_name cl_interop_names[] = { { /* cl_khr_gl_sharing */ CL_GL_CONTEXT_KHR, CL_CGL_SHAREGROUP_KHR, { { "GL", "CL_GL_CONTEXT_KHR" }, { "EGL", "CL_EGL_DISPALY_KHR" }, { "GLX", "CL_GLX_DISPLAY_KHR" }, { "WGL", "CL_WGL_HDC_KHR" }, { "CGL", "CL_CGL_SHAREGROUP_KHR" } } }, { /* cl_khr_dx9_media_sharing */ CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_DXVA_KHR, { { "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" }, { "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" }, { "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" } } }, { /* cl_khr_d3d10_sharing */ CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D10_DEVICE_KHR, { { "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" } } }, { /* cl_khr_d3d11_sharing */ CL_CONTEXT_D3D11_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR, { { "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" } } }, { /* cl_intel_dx9_media_sharing */ CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_DXVA_DEVICE_INTEL, { { "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" }, { "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" }, { "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" } } }, { /* cl_intel_va_api_media_sharing */ CL_CONTEXT_VA_API_DISPLAY_INTEL, CL_CONTEXT_VA_API_DISPLAY_INTEL, { { "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" } } } }; const size_t num_known_interops = ARRAY_SIZE(cl_interop_names); #define INDENT " " #define I0_STR "%-48s " #define I1_STR " %-46s " #define I2_STR " %-44s " static const char empty_str[] = ""; static const char spc_str[] = " "; static const char times_str[] = "x"; static const char comma_str[] = ", "; static const char vbar_str[] = " | "; const char *cur_sfx = empty_str; /* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version. * Returns an unsigned integer in the form major*10 + minor */ cl_uint getOpenCLVersion(const char *version) { cl_uint ret = 10; long parse = 0; const char *from = version; char *next = NULL; parse = strtol(from, &next, 10); if (next != from) { ret = parse*10; // skip the dot TODO should we actually check for the dot? from = ++next; parse = strtol(from, &next, 10); if (next != from) ret += parse; } return ret; } #define SPLIT_CL_VERSION(ver) ((ver)/10), ((ver)%10) /* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero, * affixing cur_sfx */ static inline void show_strbuf(const struct _strbuf *strbuf, const char *pname, int skip, cl_int err) { printf("%s" I1_STR "%s%s\n", line_pfx, pname, (skip ? skip_leading_ws(strbuf->buf) : strbuf->buf), err ? empty_str : cur_sfx); } void platform_info_str(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out* UNUSED(output)) { GET_STRING_LOC(ret, loc, clGetPlatformInfo, loc->plat, loc->param.plat); } void platform_info_ulong(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u64), &ret->value.u64, NULL), loc, "get %s"); CHECK_SIZE(ret, loc, ret->value.u64, clGetPlatformInfo, loc->plat, loc->param.plat); strbuf_printf(&ret->str, "%" PRIu64, ret->value.u64); } void platform_info_sz(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.s), &ret->value.s, NULL), loc, "get %s"); CHECK_SIZE(ret, loc, ret->value.s, clGetPlatformInfo, loc->plat, loc->param.plat); strbuf_printf(&ret->str, "%" PRIuS, ret->value.s); } struct platform_info_traits { cl_platform_info param; // CL_PLATFORM_* const char *sname; // "CL_PLATFORM_*" const char *pname; // "Platform *" const char *sfx; // suffix for the output in non-raw mode /* pointer to function that retrieves the parameter */ void (*show_func)(struct platform_info_ret *, const struct info_loc *, const struct platform_info_checks *, const struct opt_out *); /* pointer to function that checks if the parameter should be retrieved */ cl_bool (*check_func)(const struct platform_info_checks *); }; cl_bool khr_icd_p(const struct platform_info_checks *chk) { return chk->has_khr_icd; } cl_bool plat_is_20(const struct platform_info_checks *chk) { return !(chk->plat_version < 20); } cl_bool plat_is_21(const struct platform_info_checks *chk) { return !(chk->plat_version < 21); } cl_bool plat_has_amd_object_metadata(const struct platform_info_checks *chk) { return chk->has_amd_object_metadata; } #define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr } #define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL } struct platform_info_traits pinfo_traits[] = { PINFO(CL_PLATFORM_NAME, "Name", NULL, str), PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str), PINFO(CL_PLATFORM_VERSION, "Version", NULL, str), PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str), PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str), PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata), PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21), PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p) }; /* Collect (and optionally show) infomation on a specific platform, * initializing relevant arrays and optionally showing the collected * information */ void gatherPlatformInfo(struct platform_list *plist, cl_uint p, const struct opt_out *output) { cl_int len = 0; struct platform_data *pdata = plist->pdata + p; struct platform_info_checks *pinfo_checks = plist->platform_checks + p; struct platform_info_ret ret; struct info_loc loc; pinfo_checks->plat_version = 10; INIT_RET(ret, "platform"); reset_loc(&loc, __func__); loc.plat = plist->platform[p]; for (loc.line = 0; loc.line < ARRAY_SIZE(pinfo_traits); ++loc.line) { const struct platform_info_traits *traits = pinfo_traits + loc.line; /* checked is true if there was no condition to check for, or if the * condition was satisfied */ int checked = !(traits->check_func && !traits->check_func(pinfo_checks)); if (output->cond == COND_PROP_CHECK && !checked) continue; loc.sname = traits->sname; loc.pname = (output->mode == CLINFO_HUMAN ? traits->pname : traits->sname); loc.param.plat = traits->param; cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str; ret.str.buf[0] = '\0'; ret.err_str.buf[0] = '\0'; traits->show_func(&ret, &loc, pinfo_checks, output); /* The property is skipped if this was a conditional property, * unsatisfied, there was an error retrieving it and cond_prop_mode is not * COND_PROP_SHOW. */ if (ret.err && !checked && output->cond != COND_PROP_SHOW) continue; /* when only listing, do not print anything, we're just gathering * information */ if (output->detailed) { show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err); } if (ret.err) continue; /* post-processing */ switch (traits->param) { case CL_PLATFORM_NAME: /* Store name for future reference */ len = strlen(ret.str.buf); ALLOC(pdata->pname, len+1, "platform name copy"); /* memcpy instead of strncpy since we already have the len * and memcpy is possibly more optimized */ memcpy(pdata->pname, ret.str.buf, len); pdata->pname[len] = '\0'; break; case CL_PLATFORM_VERSION: /* compute numeric value for OpenCL version */ pinfo_checks->plat_version = getOpenCLVersion(ret.str.buf + 7); break; case CL_PLATFORM_EXTENSIONS: pinfo_checks->has_khr_icd = !!strstr(ret.str.buf, "cl_khr_icd"); pinfo_checks->has_amd_object_metadata = !!strstr(ret.str.buf, "cl_amd_object_metadata"); pdata->has_amd_offline = !!strstr(ret.str.buf, "cl_amd_offline_devices"); break; case CL_PLATFORM_ICD_SUFFIX_KHR: /* Store ICD suffix for future reference */ len = strlen(ret.str.buf); ALLOC(pdata->sname, len+1, "platform ICD suffix copy"); /* memcpy instead of strncpy since we already have the len * and memcpy is possibly more optimized */ memcpy(pdata->sname, ret.str.buf, len); pdata->sname[len] = '\0'; default: /* do nothing */ break; } } if (pinfo_checks->plat_version > plist->max_plat_version) plist->max_plat_version = pinfo_checks->plat_version; /* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */ if (!pdata->sname) { #define SNAME_MAX 32 ALLOC(pdata->sname, SNAME_MAX, "platform symbolic name"); snprintf(pdata->sname, SNAME_MAX, "P%" PRIu32 "", p); } len = (cl_int)strlen(pdata->sname); if (len > plist->max_sname_len) plist->max_sname_len = len; ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, 0, NULL, &pdata->ndevs); if (ret.err == CL_DEVICE_NOT_FOUND) pdata->ndevs = 0; else CHECK_ERROR(ret.err, "number of devices"); plist->ndevs_total += pdata->ndevs; plist->dev_offset[p] = p ? plist->dev_offset[p-1] + (pdata-1)->ndevs : 0; plist_devs_reserve(plist, plist->ndevs_total); if (pdata->ndevs > 0) { ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, pdata->ndevs, plist->all_devs + plist->dev_offset[p], NULL); } if (pdata->ndevs > plist->max_devs) plist->max_devs = pdata->ndevs; UNINIT_RET(ret); } /* * Device properties/extensions used in traits checks, and relevant functions */ struct device_info_checks { const struct platform_info_checks *pinfo_checks; cl_device_type devtype; cl_device_mem_cache_type cachetype; cl_device_local_mem_type lmemtype; cl_bool image_support; cl_bool compiler_available; char has_half[12]; char has_double[24]; char has_nv[29]; char has_amd[30]; char has_amd_svm[11]; char has_arm_svm[29]; char has_fission[22]; char has_atomic_counters[26]; char has_image2d_buffer[27]; char has_il_program[18]; char has_intel_local_thread[30]; char has_intel_AME[36]; char has_intel_AVC_ME[43]; char has_intel_planar_yuv[20]; char has_intel_required_subgroup_size[32]; char has_altera_dev_temp[29]; char has_p2p[23]; char has_spir[12]; char has_qcom_ext_host_ptr[21]; char has_simultaneous_sharing[30]; char has_subgroup_named_barrier[30]; char has_terminate_context[25]; cl_uint dev_version; }; #define DEFINE_EXT_CHECK(ext) cl_bool dev_has_##ext(const struct device_info_checks *chk) \ { \ return !!(chk->has_##ext[0]); \ } DEFINE_EXT_CHECK(half) DEFINE_EXT_CHECK(double) DEFINE_EXT_CHECK(nv) DEFINE_EXT_CHECK(amd) DEFINE_EXT_CHECK(amd_svm) DEFINE_EXT_CHECK(arm_svm) DEFINE_EXT_CHECK(fission) DEFINE_EXT_CHECK(atomic_counters) DEFINE_EXT_CHECK(image2d_buffer) DEFINE_EXT_CHECK(il_program) DEFINE_EXT_CHECK(intel_local_thread) DEFINE_EXT_CHECK(intel_AME) DEFINE_EXT_CHECK(intel_AVC_ME) DEFINE_EXT_CHECK(intel_planar_yuv) DEFINE_EXT_CHECK(intel_required_subgroup_size) DEFINE_EXT_CHECK(altera_dev_temp) DEFINE_EXT_CHECK(p2p) DEFINE_EXT_CHECK(spir) DEFINE_EXT_CHECK(qcom_ext_host_ptr) DEFINE_EXT_CHECK(simultaneous_sharing) DEFINE_EXT_CHECK(subgroup_named_barrier) DEFINE_EXT_CHECK(terminate_context) /* In the version checks we negate the opposite conditions * instead of double-negating the actual condition */ // device supports 1.2 cl_bool dev_is_12(const struct device_info_checks *chk) { return !(chk->dev_version < 12); } // device supports 2.0 cl_bool dev_is_20(const struct device_info_checks *chk) { return !(chk->dev_version < 20); } // device supports 2.1 cl_bool dev_is_21(const struct device_info_checks *chk) { return !(chk->dev_version < 21); } // device does not support 2.0 cl_bool dev_not_20(const struct device_info_checks *chk) { return !(chk->dev_version >= 20); } cl_bool dev_is_gpu(const struct device_info_checks *chk) { return !!(chk->devtype & CL_DEVICE_TYPE_GPU); } cl_bool dev_is_gpu_amd(const struct device_info_checks *chk) { return dev_is_gpu(chk) && dev_has_amd(chk); } /* Device supports cl_amd_device_attribute_query v4 */ cl_bool dev_has_amd_v4(const struct device_info_checks *chk) { /* We don't actually have a criterion to check if the device * supports a specific version of an extension, so for the time * being rely on them being GPU devices with cl_amd_device_attribute_query * and the platform supporting OpenCL 2.0 or later * TODO FIXME tune criteria */ return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks); } cl_bool dev_has_svm(const struct device_info_checks *chk) { return dev_is_20(chk) || dev_has_amd_svm(chk); } cl_bool dev_has_partition(const struct device_info_checks *chk) { return dev_is_12(chk) || dev_has_fission(chk); } cl_bool dev_has_cache(const struct device_info_checks *chk) { return chk->cachetype != CL_NONE; } cl_bool dev_has_lmem(const struct device_info_checks *chk) { return chk->lmemtype != CL_NONE; } cl_bool dev_has_il(const struct device_info_checks *chk) { return dev_is_21(chk) || dev_has_il_program(chk); } cl_bool dev_has_images(const struct device_info_checks *chk) { return chk->image_support; } cl_bool dev_has_images_12(const struct device_info_checks *chk) { return dev_has_images(chk) && dev_is_12(chk); } cl_bool dev_has_images_20(const struct device_info_checks *chk) { return dev_has_images(chk) && dev_is_20(chk); } cl_bool dev_has_compiler(const struct device_info_checks *chk) { return chk->compiler_available; } void identify_device_extensions(const char *extensions, struct device_info_checks *chk) { #define _HAS_EXT(ext) (strstr(extensions, ext)) #define HAS_EXT(ext) _HAS_EXT(#ext) #define CPY_EXT(what, ext) do { \ strncpy(chk->has_##what, has, sizeof(ext)); \ chk->has_##what[sizeof(ext)-1] = '\0'; \ } while (0) #define CHECK_EXT(what, ext) do { \ has = _HAS_EXT(#ext); \ if (has) CPY_EXT(what, #ext); \ } while(0) char *has; CHECK_EXT(half, cl_khr_fp16); CHECK_EXT(spir, cl_khr_spir); CHECK_EXT(double, cl_khr_fp64); if (!dev_has_double(chk)) CHECK_EXT(double, cl_amd_fp64); if (!dev_has_double(chk)) CHECK_EXT(double, cl_APPLE_fp64_basic_ops); CHECK_EXT(nv, cl_nv_device_attribute_query); CHECK_EXT(amd, cl_amd_device_attribute_query); CHECK_EXT(amd_svm, cl_amd_svm); CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory); CHECK_EXT(fission, cl_ext_device_fission); CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64); if (dev_has_atomic_counters(chk)) CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32); CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer); CHECK_EXT(il_program, cl_khr_il_program); CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread); CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation); CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation); CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv); CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size); CHECK_EXT(altera_dev_temp, cl_altera_device_temperature); CHECK_EXT(p2p, cl_amd_copy_buffer_p2p); CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr); CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing); CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier); CHECK_EXT(terminate_context, cl_khr_terminate_context); } /* * Device info print functions */ #define _GET_VAL(ret, loc, val) \ ret->err = REPORT_ERROR_LOC(ret, \ clGetDeviceInfo((loc)->dev, (loc)->param.dev, sizeof(val), &(val), NULL), \ loc, "get %s"); \ CHECK_SIZE(ret, loc, val, clGetDeviceInfo, (loc)->dev, (loc)->param.dev); #define _GET_VAL_ARRAY(ret, loc) \ ret->err = REPORT_ERROR_LOC(ret, \ clGetDeviceInfo(loc->dev, loc->param.dev, 0, NULL, &szval), \ loc, "get number of %s"); \ numval = szval/sizeof(*val); \ if (!ret->err) { \ REALLOC(val, numval, loc->sname); \ ret->err = REPORT_ERROR_LOC(ret, \ clGetDeviceInfo(loc->dev, loc->param.dev, szval, val, NULL), \ loc, "get %s"); \ if (ret->err) { free(val); val = NULL; } \ } #define GET_VAL(ret, loc, field) do { \ _GET_VAL(ret, (loc), ret->value.field) \ } while (0) #define GET_VAL_ARRAY(ret, loc) do { \ _GET_VAL_ARRAY(ret, (loc)) \ } while (0) #define DEFINE_DEVINFO_FETCH(type, field) \ type \ device_fetch_##type(struct device_info_ret *ret, \ const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), \ const struct opt_out *output) \ { \ GET_VAL(ret, loc, field); \ return ret->value.field; \ } DEFINE_DEVINFO_FETCH(size_t, s) DEFINE_DEVINFO_FETCH(cl_bool, b) DEFINE_DEVINFO_FETCH(cl_uint, u32) DEFINE_DEVINFO_FETCH(cl_ulong, u64) DEFINE_DEVINFO_FETCH(cl_device_type, devtype) DEFINE_DEVINFO_FETCH(cl_device_mem_cache_type, cachetype) DEFINE_DEVINFO_FETCH(cl_device_local_mem_type, lmemtype) DEFINE_DEVINFO_FETCH(cl_device_topology_amd, devtopo) DEFINE_DEVINFO_FETCH(cl_device_affinity_domain, affinity_domain) DEFINE_DEVINFO_FETCH(cl_device_fp_config, fpconfig) DEFINE_DEVINFO_FETCH(cl_command_queue_properties, qprop) DEFINE_DEVINFO_FETCH(cl_device_exec_capabilities, execap) DEFINE_DEVINFO_FETCH(cl_device_svm_capabilities, svmcap) DEFINE_DEVINFO_FETCH(cl_device_terminate_capability_khr, termcap) #define DEV_FETCH_LOC(type, var, loc) \ type var = device_fetch_##type(ret, loc, chk, output) #define DEV_FETCH(type, var) DEV_FETCH_LOC(type, var, loc) #define FMT_VAL(ret, fmt, val) if (!ret->err) strbuf_printf(&ret->str, fmt, val) #define DEFINE_DEVINFO_SHOW(how, type, field, fmt) \ void \ device_info_##how(struct device_info_ret *ret, \ const struct info_loc *loc, const struct device_info_checks* chk, \ const struct opt_out *output) \ { \ DEV_FETCH(type, val); \ if (!ret->err) FMT_VAL(ret, fmt, val); \ } DEFINE_DEVINFO_SHOW(int, cl_uint, u32, "%" PRIu32) DEFINE_DEVINFO_SHOW(hex, cl_uint, u32, "%#" PRIx32) DEFINE_DEVINFO_SHOW(long, cl_ulong, u64, "%" PRIu64) DEFINE_DEVINFO_SHOW(sz, size_t, s, "%" PRIuS) void device_info_str(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out* UNUSED(output)) { GET_STRING_LOC(ret, loc, clGetDeviceInfo, loc->dev, loc->param.dev); } void device_info_bool(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_bool, val); if (!ret->err) { const char * const * str = (output->mode == CLINFO_HUMAN ? bool_str : bool_raw_str); strbuf_printf(&ret->str, "%s", str[val]); } } void device_info_bits(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_uint, val); if (!ret->err) strbuf_printf(&ret->str, "%" PRIu32 " bits (%" PRIu32 " bytes)", val, val/8); } size_t strbuf_mem(struct _strbuf *str, cl_ulong val, size_t szval) { double dbl = (double)val; size_t sfx = 0; while (dbl > 1024 && sfx < memsfx_end) { dbl /= 1024; ++sfx; } return sprintf(str->buf + szval, " (%.4lg%s)", dbl, memsfx[sfx]); } void device_info_mem(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_ulong, val); if (!ret->err) { size_t szval = strbuf_printf(&ret->str, "%" PRIu64, val); if (output->mode == CLINFO_HUMAN && val > 1024) strbuf_mem(&ret->str, val, szval); } } void device_info_mem_int(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_uint, val); if (!ret->err) { size_t szval = strbuf_printf(&ret->str, "%" PRIu32, val); if (output->mode == CLINFO_HUMAN && val > 1024) strbuf_mem(&ret->str, val, szval); } } void device_info_mem_sz(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(size_t, val); if (!ret->err) { size_t szval = strbuf_printf(&ret->str, "%" PRIuS, val); if (output->mode == CLINFO_HUMAN && val > 1024) strbuf_mem(&ret->str, val, szval); } } void device_info_free_mem_amd(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t cursor = 0; szval = 0; for (cursor = 0; cursor < numval; ++cursor) { if (szval > 0) { ret->str.buf[szval] = ' '; ++szval; } szval += sprintf(ret->str.buf + szval, "%" PRIuS, val[cursor]); if (output->mode == CLINFO_HUMAN) szval += strbuf_mem(&ret->str, val[cursor]*UINT64_C(1024), szval); } // TODO: ret->value.??? = val; } free(val); } void device_info_time_offset(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_ulong, val); if (!ret->err) { size_t szval = 0; time_t time = val/UINT64_C(1000000000); szval += strbuf_printf(&ret->str, "%" PRIu64 "ns (", val); szval += bufcpy(&ret->str, szval, ctime(&time)); /* overwrite ctime's newline with the closing parenthesis */ if (szval < ret->str.sz) ret->str.buf[szval - 1] = ')'; } } void device_info_szptr_sep(struct device_info_ret *ret, const char *human_sep, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t counter = 0; set_separator(output->mode == CLINFO_HUMAN ? human_sep : spc_str); szval = 0; for (counter = 0; counter < numval; ++counter) { add_separator(&ret->str, &szval); szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%" PRIuS, val[counter]); if (szval >= ret->str.sz) { trunc_strbuf(&ret->str); break; } } // TODO: ret->value.??? = val; } free(val); } void device_info_szptr_times(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* chk, const struct opt_out *output) { device_info_szptr_sep(ret, times_str, loc, chk, output); } void device_info_szptr_comma(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* chk, const struct opt_out *output) { device_info_szptr_sep(ret, comma_str, loc, chk, output); } void getWGsizes(struct device_info_ret *ret, const struct info_loc *loc, size_t *wgm, size_t wgm_sz, const struct opt_out* UNUSED(output)) { cl_int log_err; cl_context_properties ctxpft[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)loc->plat, 0, 0 }; cl_uint cursor = 0; cl_context ctx = NULL; cl_program prg = NULL; cl_kernel krn = NULL; ret->err = CL_SUCCESS; ctx = clCreateContext(ctxpft, 1, &loc->dev, NULL, NULL, &ret->err); if (REPORT_ERROR(&ret->err_str, ret->err, "create context")) goto out; prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &ret->err); if (REPORT_ERROR(&ret->err_str, ret->err, "create program")) goto out; ret->err = clBuildProgram(prg, 1, &loc->dev, NULL, NULL, NULL); log_err = REPORT_ERROR(&ret->err_str, ret->err, "build program"); /* for a program build failure, dump the log to stderr before bailing */ if (log_err == CL_BUILD_PROGRAM_FAILURE) { struct _strbuf logbuf; init_strbuf(&logbuf); GET_STRING(&logbuf, ret->err, clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, loc->dev); if (ret->err == CL_SUCCESS) { fflush(stdout); fflush(stderr); fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr); fputs(logbuf.buf, stderr); fflush(stderr); } free_strbuf(&logbuf); } if (ret->err) goto out; for (cursor = 0; cursor < wgm_sz; ++cursor) { strbuf_printf(&ret->str, "sum%u", 1<str.buf[3] = 0; // scalar kernel is called 'sum' krn = clCreateKernel(prg, ret->str.buf, &ret->err); if (REPORT_ERROR(&ret->err_str, ret->err, "create kernel")) goto out; ret->err = clGetKernelWorkGroupInfo(krn, loc->dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(*wgm), wgm + cursor, NULL); if (REPORT_ERROR(&ret->err_str, ret->err, "get kernel info")) goto out; clReleaseKernel(krn); krn = NULL; } out: if (krn) clReleaseKernel(krn); if (prg) clReleaseProgram(prg); if (ctx) clReleaseContext(ctx); } void device_info_wg(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { /* preferred workgroup size multiple for each kernel * have not found a platform where the WG multiple changes, * but keep this flexible (this can grow up to 5) */ #define NUM_KERNELS 1 size_t wgm[NUM_KERNELS] = {0}; getWGsizes(ret, loc, wgm, NUM_KERNELS, output); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIuS, wgm[0]); } ret->value.s = wgm[0]; } void device_info_img_sz_2d(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; size_t width = 0, height = 0; _GET_VAL(ret, loc, height); /* HEIGHT */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE2D_MAX_WIDTH); _GET_VAL(ret, &loc2, width); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height); } } ret->value.u32v.s[0] = width; ret->value.u32v.s[1] = height; } void device_info_img_sz_intel_planar_yuv(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; size_t width = 0, height = 0; _GET_VAL(ret, loc, height); /* HEIGHT */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL); _GET_VAL(ret, &loc2, width); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS, width, height); } } ret->value.u32v.s[0] = width; ret->value.u32v.s[1] = height; } void device_info_img_sz_3d(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; size_t width = 0, height = 0, depth = 0; _GET_VAL(ret, loc, height); /* HEIGHT */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_WIDTH); _GET_VAL(ret, &loc2, width); if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_DEPTH); _GET_VAL(ret, &loc2, depth); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIuS "x%" PRIuS "x%" PRIuS, width, height, depth); } } } ret->value.u32v.s[0] = width; ret->value.u32v.s[1] = height; ret->value.u32v.s[2] = depth; } void device_info_devtype(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_type, val); if (!ret->err) { /* iterate over device type strings, appending their textual form * to ret->str */ cl_uint i = (cl_uint)actual_devtype_count; const char * const *devstr = (output->mode == CLINFO_HUMAN ? device_type_str : device_type_raw_str); size_t szval = 0; set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str); for (; i > 0; --i) { /* assemble CL_DEVICE_TYPE_* from index i */ cl_device_type cur = (cl_device_type)(1) << (i-1); if (val & cur) { /* match: add separator if not first match */ add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, devstr[i]); } } /* check for extra bits */ if (szval < ret->str.sz) { cl_device_type known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1; cl_device_type extra = val & ~known_mask; if (extra) { add_separator(&ret->str, &szval); szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra); } } } } void device_info_cachetype(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_mem_cache_type, val); if (!ret->err) { const char * const *ar = (output->mode == CLINFO_HUMAN ? cache_type_str : cache_type_raw_str); bufcpy(&ret->str, 0, ar[val]); } } void device_info_lmemtype(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_local_mem_type, val); if (!ret->err) { const char * const *ar = (output->mode == CLINFO_HUMAN ? lmem_type_str : lmem_type_raw_str); bufcpy(&ret->str, 0, ar[val]); } ret->value.lmemtype = val; } /* stringify a cl_device_topology_amd */ void devtopo_str(struct device_info_ret *ret, const cl_device_topology_amd *devtopo) { switch (devtopo->raw.type) { case 0: /* leave empty */ break; case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD: strbuf_printf(&ret->str, "PCI-E, %02x:%02x.%u", (cl_uchar)(devtopo->pcie.bus), devtopo->pcie.device, devtopo->pcie.function); break; default: strbuf_printf(&ret->str, "", devtopo->raw.type, devtopo->raw.data[0], devtopo->raw.data[1], devtopo->raw.data[2], devtopo->raw.data[3], devtopo->raw.data[4]); } } void device_info_devtopo_amd(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_topology_amd, val); /* TODO how to do this in CLINFO_RAW mode */ if (!ret->err) { devtopo_str(ret, &val); } } /* we assemble a cl_device_topology_amd struct from the NVIDIA info */ void device_info_devtopo_nv(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; cl_device_topology_amd devtopo; DEV_FETCH(cl_uint, val); /* CL_DEVICE_PCI_BUS_ID_NV */ if (!ret->err) { devtopo.raw.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD; devtopo.pcie.bus = val & 0xff; RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_SLOT_ID_NV); _GET_VAL(ret, &loc2, val); if (!ret->err) { devtopo.pcie.device = (val >> 3) & 0xff; devtopo.pcie.function = val & 7; devtopo_str(ret, &devtopo); } ret->value.devtopo = devtopo; } } /* NVIDIA Compute Capability */ void device_info_cc_nv(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; cl_uint major = 0, minor = 0; _GET_VAL(ret, loc, major); /* MAJOR */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); _GET_VAL(ret, &loc2, minor); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor); } } ret->value.u32v.s[0] = major; ret->value.u32v.s[1] = minor; } /* AMD GFXIP */ void device_info_gfxip_amd(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; cl_uint major = 0, minor = 0; _GET_VAL(ret, loc, major); /* MAJOR */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_GFXIP_MINOR_AMD); _GET_VAL(ret, &loc2, minor); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor); } } ret->value.u32v.s[0] = major; ret->value.u32v.s[1] = minor; } /* Device Partition, CLINFO_HUMAN header */ void device_info_partition_header(struct device_info_ret *ret, const struct info_loc *UNUSED(loc), const struct device_info_checks *chk, const struct opt_out* UNUSED(output)) { cl_bool is_12 = dev_is_12(chk); cl_bool has_fission = dev_has_fission(chk); size_t szval = strbuf_printf(&ret->str, "(%s%s%s%s)", (is_12 ? core : empty_str), (is_12 && has_fission ? comma_str : empty_str), chk->has_fission, (!(is_12 || has_fission) ? na : empty_str)); ret->err = CL_SUCCESS; if (szval >= ret->str.sz) trunc_strbuf(&ret->str); } /* Device partition properties */ void device_info_partition_types(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t numval = 0, szval = 0, cursor = 0, slen = 0; cl_device_partition_property *val = NULL; const char * const *ptstr = (output->mode == CLINFO_HUMAN ? partition_type_str : partition_type_raw_str); set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str); GET_VAL_ARRAY(ret, loc); szval = 0; if (!ret->err) { for (cursor = 0; cursor < numval; ++cursor) { int str_idx = -1; /* add separator for values past the first */ add_separator(&ret->str, &szval); switch (val[cursor]) { case 0: str_idx = 0; break; case CL_DEVICE_PARTITION_EQUALLY: str_idx = 1; break; case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 2; break; case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 3; break; case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 4; break; default: szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by (%#" PRIxPTR ")", val[cursor]); break; } if (str_idx >= 0) { /* string length, minus _EXT */ slen = strlen(ptstr[str_idx]); if (output->mode == CLINFO_RAW && str_idx > 0) slen -= 4; szval += bufcpy_len(&ret->str, szval, ptstr[str_idx], slen); } if (szval >= ret->str.sz) { trunc_strbuf(&ret->str); break; } } // TODO ret->value.??? = val } free(val); } void device_info_partition_types_ext(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t numval = 0, szval = 0, cursor = 0, slen = 0; cl_device_partition_property_ext *val = NULL; const char * const *ptstr = (output->mode == CLINFO_HUMAN ? partition_type_str : partition_type_raw_str); set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str); GET_VAL_ARRAY(ret, loc); szval = 0; if (!ret->err) { for (cursor = 0; cursor < numval; ++cursor) { int str_idx = -1; /* add separator for values past the first */ add_separator(&ret->str, &szval); switch (val[cursor]) { case 0: str_idx = 0; break; case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 1; break; case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 2; break; case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 3; break; case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 4; break; default: szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "by (%#" PRIx64 ")", val[cursor]); break; } if (str_idx >= 0) { /* string length */ slen = strlen(ptstr[str_idx]); strncpy(ret->str.buf + szval, ptstr[str_idx], slen); szval += slen; } if (szval >= ret->str.sz) { trunc_strbuf(&ret->str); break; } } if (szval < ret->str.sz) ret->str.buf[szval] = '\0'; // TODO ret->value.??? = val } free(val); } /* Device partition affinity domains */ void device_info_partition_affinities(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_affinity_domain, val); if (!ret->err && val) { /* iterate over affinity domain strings appending their textual form * to ret->str */ size_t szval = 0; cl_uint i = 0; const char * const *affstr = (output->mode == CLINFO_HUMAN ? affinity_domain_str : affinity_domain_raw_str); set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str); for (i = 0; i < affinity_domain_count; ++i) { cl_device_affinity_domain cur = (cl_device_affinity_domain)(1) << i; if (val & cur) { /* match: add separator if not first match */ add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, affstr[i]); } if (szval >= ret->str.sz) break; } /* check for extra bits */ if (szval < ret->str.sz) { cl_device_affinity_domain known_mask = ((cl_device_affinity_domain)(1) << affinity_domain_count) - 1; cl_device_affinity_domain extra = val & ~known_mask; if (extra) { add_separator(&ret->str, &szval); szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra); } } } } void device_info_partition_affinities_ext(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t numval = 0, szval = 0, cursor = 0, slen = 0; cl_device_partition_property_ext *val = NULL; const char * const *ptstr = (output->mode == CLINFO_HUMAN ? affinity_domain_ext_str : affinity_domain_raw_ext_str); set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str); GET_VAL_ARRAY(ret, loc); szval = 0; if (!ret->err) { for (cursor = 0; cursor < numval; ++cursor) { int str_idx = -1; /* add separator for values past the first */ add_separator(&ret->str, &szval); switch (val[cursor]) { case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break; case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break; case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break; case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break; case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break; case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break; default: szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, " (%#" PRIx64 ")", val[cursor]); break; } if (str_idx >= 0) { /* string length */ const char *str = ptstr[str_idx]; slen = strlen(str); strncpy(ret->str.buf + szval, str, slen); szval += slen; } if (szval >= ret->str.sz) { trunc_strbuf(&ret->str); break; } } ret->str.buf[szval] = '\0'; // TODO: ret->value.??? = val } free(val); } /* Preferred / native vector widths */ void device_info_vecwidth(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { struct info_loc loc2 = *loc; cl_uint preferred = 0, native = 0; _GET_VAL(ret, loc, preferred); if (!ret->err) { /* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF, * which is at +0x08 */ loc2.param.dev += (loc2.param.dev == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30); /* TODO update loc2.sname */ _GET_VAL(ret, &loc2, native); if (!ret->err) { size_t szval = 0; const char *ext = (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ? chk->has_half : (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ? chk->has_double : NULL)); szval = strbuf_printf(&ret->str, "%8u / %-8u", preferred, native); if (ext) sprintf(ret->str.buf + szval, " (%s)", *ext ? ext : na); } } ret->value.u32v.s[0] = preferred; ret->value.u32v.s[1] = native; } /* Floating-point configurations */ void device_info_fpconf(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { /* When in HUMAN output, we are called unconditionally, * so we have to do some manual checks ourselves */ const cl_bool get_it = (output->mode != CLINFO_HUMAN) || (loc->param.dev == CL_DEVICE_SINGLE_FP_CONFIG) || (loc->param.dev == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) || (loc->param.dev == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk)); DEV_FETCH(cl_device_fp_config, val); /* Sanitize! */ if (ret->err && !get_it) { ret->err = CL_SUCCESS; val = 0; } if (!ret->err) { size_t szval = 0; cl_uint i = 0; const char * const *fpstr = (output->mode == CLINFO_HUMAN ? fp_conf_str : fp_conf_raw_str); set_separator(vbar_str); if (output->mode == CLINFO_HUMAN) { const char *why = na; switch (loc->param.dev) { case CL_DEVICE_HALF_FP_CONFIG: if (get_it) why = chk->has_half; break; case CL_DEVICE_SINGLE_FP_CONFIG: why = core; break; case CL_DEVICE_DOUBLE_FP_CONFIG: if (get_it) why = chk->has_double; break; default: /* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */ fprintf(stderr, "unsupported floating-point configuration parameter %s\n", loc->pname); } /* show 'why' it's being shown */ szval += strbuf_printf(&ret->str, "(%s)", why); } if (get_it) { size_t num_flags = fp_conf_count; /* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered * in the single-precision case. half and double don't consider it, * so we skip it altogether */ if (loc->param.dev != CL_DEVICE_SINGLE_FP_CONFIG) num_flags -= 1; for (i = 0; i < num_flags; ++i) { cl_device_fp_config cur = (cl_device_fp_config)(1) << i; if (output->mode == CLINFO_HUMAN) { szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s", line_pfx, fpstr[i], bool_str[!!(val & cur)]); } else if (val & cur) { add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, fpstr[i]); } } } } } /* Queue properties */ void device_info_qprop(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { DEV_FETCH(cl_command_queue_properties, val); if (!ret->err) { size_t szval = 0; cl_uint i = 0; const char * const *qpstr = (output->mode == CLINFO_HUMAN ? queue_prop_str : queue_prop_raw_str); set_separator(vbar_str); for (i = 0; i < queue_prop_count; ++i) { cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i; if (output->mode == CLINFO_HUMAN) { szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s", line_pfx, qpstr[i], bool_str[!!(val & cur)]); } else if (val & cur) { add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, qpstr[i]); } } if (output->mode == CLINFO_HUMAN && loc->param.dev == CL_DEVICE_QUEUE_PROPERTIES && dev_has_intel_local_thread(chk)) sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s", line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]); } } /* Execution capbilities */ void device_info_execap(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_exec_capabilities, val); if (!ret->err) { size_t szval = 0; cl_uint i = 0; const char * const *qpstr = (output->mode == CLINFO_HUMAN ? execap_str : execap_raw_str); set_separator(vbar_str); for (i = 0; i < execap_count; ++i) { cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i; if (output->mode == CLINFO_HUMAN) { szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s", line_pfx, qpstr[i], bool_str[!!(val & cur)]); } else if (val & cur) { add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, qpstr[i]); } } } } /* Arch bits and endianness (HUMAN) */ void device_info_arch(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_uint, bits); struct info_loc loc2 = *loc; RESET_LOC_PARAM(loc2, dev, CL_DEVICE_ENDIAN_LITTLE); if (!ret->err) { DEV_FETCH_LOC(cl_bool, val, &loc2); if (!ret->err) { strbuf_printf(&ret->str, "%" PRIu32 ", %s", bits, endian_str[val]); } } } /* SVM capabilities */ void device_info_svm_cap(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { const cl_bool is_20 = dev_is_20(chk); const cl_bool checking_core = (loc->param.dev == CL_DEVICE_SVM_CAPABILITIES); const cl_bool has_amd_svm = (checking_core && dev_has_amd_svm(chk)); DEV_FETCH(cl_device_svm_capabilities, val); if (!ret->err) { size_t szval = 0; cl_uint i = 0; const char * const *scstr = (output->mode == CLINFO_HUMAN ? svm_cap_str : svm_cap_raw_str); set_separator(vbar_str); if (output->mode == CLINFO_HUMAN && checking_core) { /* show 'why' it's being shown */ szval += strbuf_printf(&ret->str, "(%s%s%s)", (is_20 ? core : empty_str), (is_20 && has_amd_svm ? comma_str : empty_str), chk->has_amd_svm); } for (i = 0; i < svm_cap_count; ++i) { cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i; if (output->mode == CLINFO_HUMAN) { szval += sprintf(ret->str.buf + szval, "\n%s" I2_STR "%s", line_pfx, scstr[i], bool_str[!!(val & cur)]); } else if (val & cur) { add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, scstr[i]); } } } } /* Device terminate capability */ void device_info_terminate_capability(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_device_terminate_capability_khr, val); if (!ret->err && val) { /* iterate over terminate capability strings appending their textual form * to ret->str */ size_t szval = 0; cl_uint i = 0; const char * const *capstr = (output->mode == CLINFO_HUMAN ? terminate_capability_str : terminate_capability_raw_str); set_separator(output->mode == CLINFO_HUMAN ? comma_str : vbar_str); for (i = 0; i < terminate_capability_count; ++i) { cl_device_terminate_capability_khr cur = (cl_device_terminate_capability_khr)(1) << i; if (val & cur) { /* match: add separator if not first match */ add_separator(&ret->str, &szval); szval += bufcpy(&ret->str, szval, capstr[i]); } if (szval >= ret->str.sz) break; } /* check for extra bits */ if (szval < ret->str.sz) { cl_device_terminate_capability_khr known_mask = ((cl_device_terminate_capability_khr)(1) << terminate_capability_count) - 1; cl_device_terminate_capability_khr extra = val & ~known_mask; if (extra) { add_separator(&ret->str, &szval); szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx64, extra); } } } } void device_info_p2p_dev_list(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out* UNUSED(output)) { cl_device_id *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t cursor = 0; szval = 0; for (cursor= 0; cursor < numval; ++cursor) { if (szval > 0) { ret->str.buf[szval] = ' '; ++szval; } szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%p", (void*)val[cursor]); } // TODO: ret->value.??? = val; } free(val); } void device_info_interop_list(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_uint *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t cursor = 0; const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops; cl_uint human_raw = output->mode - CLINFO_HUMAN; const char *groupsep = (output->mode == CLINFO_HUMAN ? comma_str : vbar_str); cl_bool first = CL_TRUE; szval = 0; for (cursor = 0; cursor < numval; ++cursor) { cl_uint current = val[cursor]; if (!current && cursor < numval - 1) { /* A null value is used as group terminator, but we only print it * if it's not the final one */ szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", groupsep); first = CL_TRUE; } if (current) { cl_bool found = CL_FALSE; const cl_interop_name *n = cl_interop_names; if (!first) { ret->str.buf[szval] = ' '; ++szval; } while (n < interop_name_end) { if (current >= n->from && current <= n->to) { found = CL_TRUE; break; } ++n; } if (found) { cl_uint i = current - n->from; szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%s", n->value[i][human_raw]); } else { szval += snprintf(ret->str.buf + szval, ret->str.sz - szval - 1, "%#" PRIx32, val[cursor]); } first = CL_FALSE; } if (szval >= ret->str.sz) { trunc_strbuf(&ret->str); break; } } // TODO: ret->value.??? = val; } free(val); } /* * Device info traits */ /* A CL_FALSE param means "just print pname" */ struct device_info_traits { enum output_modes output_mode; cl_device_info param; // CL_DEVICE_* const char *sname; // "CL_DEVICE_*" const char *pname; // "Device *" const char *sfx; // suffix for the output in non-raw mode /* pointer to function that retrieves the parameter */ void (*show_func)(struct device_info_ret *, const struct info_loc *, const struct device_info_checks *, const struct opt_out *); /* pointer to function that checks if the parameter should be retrieved */ cl_bool (*check_func)(const struct device_info_checks *); }; #define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ #define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ struct device_info_traits dinfo_traits[] = { { CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL }, { CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd }, /* Device Topology (NV) is multipart, so different for HUMAN and RAW */ { CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL }, /* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */ { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv }, /* GFXIP (AMD) is multipart, so different for HUMAN and RAW */ /* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query * extension specification calls it “core engine GFXIP”, which honestly is not better than * our name choice. */ { CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd }, { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd }, { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp }, /* Device partition support: summary is only presented in HUMAN case */ { CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission }, { CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL }, /* cl_amd_device_attribute_query v4 */ { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple", wg), dev_has_compiler }, { CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier }, { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size }, /* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs * PREFERRED and NATIVE in a single line */ #define DINFO_VECWIDTH(Type, type) \ { CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \ { CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \ { CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), NULL } { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL }, DINFO_VECWIDTH(CHAR, char), DINFO_VECWIDTH(SHORT, short), DINFO_VECWIDTH(INT, int), DINFO_VECWIDTH(LONG, long), DINFO_VECWIDTH(HALF, half), DINFO_VECWIDTH(FLOAT, float), DINFO_VECWIDTH(DOUBLE, double), /* Floating point configurations */ #define DINFO_FPCONF(Type, type, cond) \ { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }, \ { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), cond } DINFO_FPCONF(HALF, Half, dev_has_half), DINFO_FPCONF(SINGLE, Single, NULL), DINFO_FPCONF(DOUBLE, Double, dev_has_double), /* Address bits and endianness are written together for HUMAN, separate for RAW */ { CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL }, { CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL }, { CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL }, /* Global memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm }, { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm }, /* Alignment */ { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL }, { CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL }, { CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr }, /* Atomics alignment, with HUMAN-only header */ { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 }, /* Global variables. TODO some 1.2 devices respond to this too */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 }, /* Global memory cache */ { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache }, /* Image support */ { CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer }, /* Image dimensions are split for RAW, combined for HUMAN */ { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size", pixels_str, img_sz_2d), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height", sz), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width", sz), dev_has_images }, { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size", pixels_str, img_sz_2d), dev_has_intel_planar_yuv }, { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height", sz), dev_has_intel_planar_yuv }, { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width", sz), dev_has_intel_planar_yuv }, { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size", pixels_str, img_sz_3d), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height", sz), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width", sz), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth", sz), dev_has_images }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 }, /* Pipes */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 }, /* Local memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem }, { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory syze per CU (AMD)", mem), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv }, /* Constant memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters }, /* Queue properties */ { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 }, /* Terminate context */ { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context }, { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x, "Terminate capability (2.x define)", terminate_capability), dev_has_terminate_context }, /* Interop */ { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing }, /* P2P buffer copy */ { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p }, { CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p }, /* Profiling resolution */ { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL }, { CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd }, { CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd }, /* Kernel execution capabilities */ { CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 }, { CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, "Concurrent copy and kernel execution (NV)", bool), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of async copy engines", int), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 }, /* TODO FIXME undocumented, experimental */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 }, /* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */ { CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il }, { CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir }, { CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME }, }; /* Process all the device info in the traits, except if param_whitelist is not NULL, * in which case only those in the whitelist will be processed. * If present, the whitelist should be sorted in the order of appearance of the parameters * in the traits table, and terminated by the value CL_FALSE */ void printDeviceInfo(cl_device_id dev, const struct platform_list *plist, cl_uint p, const cl_device_info *param_whitelist, /* list of device info to process, or NULL */ const struct opt_out *output) { char *extensions = NULL; /* pointer to the traits for CL_DEVICE_EXTENSIONS */ const struct device_info_traits *extensions_traits = NULL; struct device_info_checks chk; struct device_info_ret ret; struct info_loc loc; memset(&chk, 0, sizeof(chk)); chk.pinfo_checks = plist->platform_checks + p; chk.dev_version = 10; INIT_RET(ret, "device"); reset_loc(&loc, __func__); loc.plat = plist->platform[p]; loc.dev = dev; for (loc.line = 0; loc.line < ARRAY_SIZE(dinfo_traits); ++loc.line) { const struct device_info_traits *traits = dinfo_traits + loc.line; /* checked is true if there was no condition to check for, or if the * condition was satisfied */ int checked = !(traits->check_func && !traits->check_func(&chk)); loc.sname = traits->sname; loc.pname = (output->mode == CLINFO_HUMAN ? traits->pname : traits->sname); loc.param.dev = traits->param; /* Whitelist check: finish if done traversing the list, * skip current param if it's not the right one */ if ((output->cond == COND_PROP_CHECK || output->brief) && param_whitelist) { if (*param_whitelist == CL_FALSE) break; if (traits->param != *param_whitelist) continue; ++param_whitelist; } /* skip if it's not for this output mode */ if (!(output->mode & traits->output_mode)) continue; if (output->cond == COND_PROP_CHECK && !checked) continue; cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str; ret.str.buf[0] = '\0'; ret.err_str.buf[0] = '\0'; /* Handle headers */ if (traits->param == CL_FALSE) { ret.err = CL_SUCCESS; show_strbuf(&ret.str, loc.pname, 0, ret.err); continue; } traits->show_func(&ret, &loc, &chk, output); if (traits->param == CL_DEVICE_EXTENSIONS) { /* make a backup of the extensions string, regardless of * errors */ const char *msg = RET_BUF(ret)->buf; size_t len = strlen(msg); extensions_traits = traits; ALLOC(extensions, len+1, "extensions"); memcpy(extensions, msg, len); extensions[len] = '\0'; } else { if (ret.err) { /* if there was an error retrieving the property, * skip if it wasn't expected to work and we * weren't asked to show everything regardless of * error */ if (!checked && output->cond != COND_PROP_SHOW) continue; } else { /* on success, but empty result, show (n/a) */ if (ret.str.buf[0] == '\0') bufcpy(&ret.str, 0, not_specified(output)); } if (output->brief) printf("%s%s\n", line_pfx, RET_BUF(ret)->buf); else show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err); } if (ret.err) continue; switch (traits->param) { case CL_DEVICE_VERSION: /* compute numeric value for OpenCL version */ chk.dev_version = getOpenCLVersion(ret.str.buf + 7); break; case CL_DEVICE_EXTENSIONS: identify_device_extensions(extensions, &chk); break; case CL_DEVICE_TYPE: chk.devtype = ret.value.devtype; break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: chk.cachetype = ret.value.cachetype; break; case CL_DEVICE_LOCAL_MEM_TYPE: chk.lmemtype = ret.value.lmemtype; break; case CL_DEVICE_IMAGE_SUPPORT: chk.image_support = ret.value.b; break; case CL_DEVICE_COMPILER_AVAILABLE: chk.compiler_available = ret.value.b; break; default: /* do nothing */ break; } } // and finally the extensions, if we retrieved them if (extensions) printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ? extensions_traits->pname : extensions_traits->sname), extensions); free(extensions); extensions = NULL; UNINIT_RET(ret); } /* list of allowed properties for AMD offline devices */ /* everything else seems to be set to 0, and all the other string properties * actually segfault the driver */ static const cl_device_info amd_offline_info_whitelist[] = { CL_DEVICE_NAME, /* These are present, but all the same, so just skip them: CL_DEVICE_VENDOR, CL_DEVICE_VENDOR_ID, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_OPENCL_C_VERSION, */ CL_DEVICE_EXTENSIONS, CL_DEVICE_TYPE, CL_DEVICE_GFXIP_MAJOR_AMD, CL_DEVICE_GFXIP_MINOR_AMD, CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_FALSE }; static const cl_device_info list_info_whitelist[] = { CL_DEVICE_NAME, CL_FALSE }; /* return a list of offline devices from the AMD extension */ cl_device_id * fetchOfflineDevicesAMD(const struct platform_list *plist, cl_uint p, /* the number of devices will be returned in ret->value.u32, * the associated context in ret->base.ctx; */ struct device_info_ret *ret) { cl_platform_id pid = plist->platform[p]; cl_device_id *device = NULL; cl_uint num_devs = 0; cl_context ctx = NULL; cl_context_properties ctxpft[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)pid, CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE, 0 }; ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL, NULL, NULL, &ret->err); REPORT_ERROR(&ret->err_str, ret->err, "create context"); if (!ret->err) { ret->err = REPORT_ERROR(&ret->err_str, clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES, sizeof(num_devs), &num_devs, NULL), "get num devs"); } if (!ret->err) { ALLOC(device, num_devs, "offline devices"); ret->err = REPORT_ERROR(&ret->err_str, clGetContextInfo(ctx, CL_CONTEXT_DEVICES, num_devs*sizeof(*device), device, NULL), "get devs"); } if (ret->err) { if (ctx) clReleaseContext(ctx); free(device); device = NULL; } else { ret->value.u32 = num_devs; ret->base.ctx = ctx; } return device; } void printPlatformName(const struct platform_list *plist, cl_uint p, struct _strbuf *str, const struct opt_out *output) { const struct platform_data *pdata = plist->pdata + p; const char *brief_prefix = (output->mode == CLINFO_HUMAN ? "Platform #" : ""); const char *title = (output->mode == CLINFO_HUMAN ? pinfo_traits[0].pname : pinfo_traits[0].sname); const int prefix_width = -line_pfx_len*(!output->brief); if (output->brief) { strbuf_printf(str, "%s%" PRIu32 ": ", brief_prefix, p); } else if (output->mode == CLINFO_RAW) { strbuf_printf(str, "[%s/*]", pdata->sname); } sprintf(line_pfx, "%*s", prefix_width, str->buf); if (output->brief) printf("%s%s\n", line_pfx, pdata->pname); else printf("%s" I1_STR "%s\n", line_pfx, title, pdata->pname); } void printPlatformDevices(const struct platform_list *plist, cl_uint p, const cl_device_id *device, cl_uint ndevs, struct _strbuf *str, const struct opt_out *output, cl_bool these_are_offline) { const struct platform_data *pdata = plist->pdata + p; const cl_device_info *param_whitelist = output->brief ? list_info_whitelist : these_are_offline ? amd_offline_info_whitelist : NULL; cl_uint d; if (output->detailed) printf("%s" I0_STR "%" PRIu32 "\n", line_pfx, num_devs_header(output, these_are_offline), ndevs); for (d = 0; d < ndevs; ++d) { const cl_device_id dev = device[d]; if (output->brief) { const cl_bool last_device = (d == ndevs - 1 && output->mode != CLINFO_RAW && (!output->offline || !pdata->has_amd_offline || these_are_offline)); if (output->mode == CLINFO_RAW) sprintf(line_pfx, "%" PRIu32 "%c%" PRIu32 ": ", p, these_are_offline ? '*' : '.', d); else sprintf(line_pfx, " +-- %sDevice #%" PRIu32 ": ", these_are_offline ? "Offline " : "", d); if (last_device) line_pfx[1] = '`'; } else if (line_pfx_len > 0) { cl_int sd = (these_are_offline ? -1 : 1)*(cl_int)d; strbuf_printf(str, "[%s/%" PRId32 "]", pdata->sname, sd); sprintf(line_pfx, "%*s", -line_pfx_len, str->buf); } printDeviceInfo(dev, plist, p, param_whitelist, output); if (output->detailed && d < pdata[p].ndevs - 1) puts(""); fflush(stdout); fflush(stderr); } } void showDevices(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const cl_uint maxdevs = plist->max_devs; const struct platform_data *pdata = plist->pdata; cl_uint p; struct _strbuf str; init_strbuf(&str); realloc_strbuf(&str, 1024, "show devices"); if (output->mode == CLINFO_RAW) { if (output->brief) strbuf_printf(&str, "%" PRIu32 ".%" PRIu32 ": ", num_platforms, maxdevs); else strbuf_printf(&str, "[%*s/%" PRIu32 "] ", plist->max_sname_len, "", maxdevs); } else { if (output->brief) strbuf_printf(&str, " +-- %sDevice #%" PRIu32 ": ", (output->offline ? "Offline " : ""), maxdevs); else str.buf[0] = '\0'; /* reset */ /* TODO we have no prefix in HUMAN detailed output mode, * consider adding one */ } if (str.buf[0]) { line_pfx_len = (int)(strlen(str.buf) + 1); REALLOC(line_pfx, line_pfx_len, "line prefix"); str.buf[0] = '\0'; /* reset */ } for (p = 0; p < num_platforms; ++p) { printPlatformName(plist, p, &str, output); printPlatformDevices(plist, p, get_platform_devs(plist, p), pdata[p].ndevs, &str, output, CL_FALSE); if (output->offline && pdata[p].has_amd_offline) { struct device_info_ret ret; cl_device_id *devs = NULL; INIT_RET(ret, "offline device"); if (output->detailed) puts(""); devs = fetchOfflineDevicesAMD(plist, p, &ret); if (ret.err) { puts(ret.err_str.buf); } else { printPlatformDevices(plist, p, devs, ret.value.u32, &str, output, CL_TRUE); clReleaseContext(ret.base.ctx); free(devs); } UNINIT_RET(ret); } if (output->detailed) puts(""); } free_strbuf(&str); } /* check the behavior of clGetPlatformInfo() when given a NULL platform ID */ void checkNullGetPlatformName(const struct opt_out *output) { struct device_info_ret ret; struct info_loc loc; INIT_RET(ret, "null ctx"); reset_loc(&loc, __func__); RESET_LOC_PARAM(loc, plat, CL_PLATFORM_NAME); ret.err = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ret.str.sz, ret.str.buf, NULL); if (ret.err == CL_INVALID_PLATFORM) { bufcpy(&ret.err_str, 0, no_plat(output)); } else { loc.line = __LINE__ + 1; REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s"); } printf(I1_STR "%s\n", "clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", RET_BUF(ret)->buf); UNINIT_RET(ret); } /* check the behavior of clGetDeviceIDs() when given a NULL platform ID; * return the index of the default platform in our array of platform IDs, * or num_platforms (which is an invalid platform index) in case of errors * or no platform or device found. */ cl_uint checkNullGetDevices(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const struct platform_data *pdata = plist->pdata; const cl_platform_id *platform = plist->platform; struct device_info_ret ret; struct info_loc loc; cl_uint i = 0; /* generic iterator */ cl_device_id dev = NULL; /* sample device */ cl_platform_id plat = NULL; /* detected platform */ cl_uint found = 0; /* number of platforms found */ cl_uint pidx = num_platforms; /* index of the platform found */ cl_uint numdevs = 0; INIT_RET(ret, "null get devices"); reset_loc(&loc, __func__); loc.sname = "device IDs"; ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs); /* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart * implementation might give you a different default platform for GPUs * and for CPUs. * Of course the “no devices” case would then need to be handled differently. * The logic might be maintained similarly, provided we also gather * the number of devices of each type for each platform, although it's * obviously more likely to have multiple platforms with no devices * of a given type. */ switch (ret.err) { case CL_INVALID_PLATFORM: bufcpy(&ret.err_str, 0, no_plat(output)); break; case CL_DEVICE_NOT_FOUND: /* No devices were found, see if there are platforms with * no devices, and if there's only one, assume this is the * one being used as default by the ICD loader */ for (i = 0; i < num_platforms; ++i) { if (pdata[i].ndevs == 0) { ++found; if (found > 1) break; else { plat = platform[i]; pidx = i; } } } switch (found) { case 0: bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ? "" : "CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM")); break; case 1: bufcpy(&ret.str, 0, (output->mode == CLINFO_HUMAN ? pdata[pidx].pname : pdata[pidx].sname)); break; default: /* found > 1 */ bufcpy(&ret.err_str, 0, (output->mode == CLINFO_HUMAN ? "" : "CL_DEVICE_NOT_FOUND | ????")); break; } break; default: loc.line = __LINE__+1; if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get number of %s")) break; /* Determine platform by looking at the CL_DEVICE_PLATFORM of * one of the devices */ ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL); loc.line = __LINE__+1; if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM); ret.err = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL); loc.line = __LINE__+1; if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; for (i = 0; i < num_platforms; ++i) { if (platform[i] == plat) { pidx = i; strbuf_printf(&ret.str, "%s [%s]", (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"), pdata[i].sname); break; } } if (i == num_platforms) { ret.err = CL_INVALID_PLATFORM; strbuf_printf(&ret.err_str, "", (void*)plat); } } printf(I1_STR "%s\n", "clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", RET_BUF(ret)->buf); UNINIT_RET(ret); return pidx; } void checkNullCtx(struct device_info_ret *ret, const struct platform_list *plist, cl_uint pidx, const char *which, const struct opt_out *output) { const cl_device_id *dev = plist->all_devs + plist->dev_offset[pidx]; struct info_loc loc; cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &ret->err); reset_loc(&loc, __func__); loc.sname = which; loc.line = __LINE__+2; if (!REPORT_ERROR_LOC(ret, ret->err, &loc, "create context with device from %s platform")) strbuf_printf(&ret->str, "%s [%s]", (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"), plist->pdata[pidx].sname); if (ctx) { clReleaseContext(ctx); ctx = NULL; } } /* check behavior of clCreateContextFromType() with NULL cl_context_properties */ void checkNullCtxFromType(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const struct platform_data *pdata = plist->pdata; const cl_platform_id *platform = plist->platform; size_t t; /* type iterator */ size_t i; /* generic iterator */ char def[1024]; cl_context ctx = NULL; size_t ndevs = 8; size_t szval = 0; size_t cursz = ndevs*sizeof(cl_device_id); cl_platform_id plat = NULL; cl_device_id *devs = NULL; struct device_info_ret ret; struct info_loc loc; const char *platname_prop = (output->mode == CLINFO_HUMAN ? pinfo_traits[0].pname : pinfo_traits[0].sname); const char *devname_prop = (output->mode == CLINFO_HUMAN ? dinfo_traits[0].pname : dinfo_traits[0].sname); reset_loc(&loc, __func__); INIT_RET(ret, "null ctx from type"); ALLOC(devs, ndevs, "context devices"); for (t = 1; t < devtype_count; ++t) { /* we skip 0 */ loc.sname = device_type_raw_str[t]; strbuf_printf(&ret.str, "clCreateContextFromType(NULL, %s)", loc.sname); sprintf(def, I1_STR, ret.str.buf); loc.line = __LINE__+1; ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &ret.err); switch (ret.err) { case CL_INVALID_PLATFORM: bufcpy(&ret.err_str, 0, no_plat(output)); break; case CL_DEVICE_NOT_FOUND: bufcpy(&ret.err_str, 0, no_dev_found(output)); break; case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */ bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break; case CL_INVALID_VALUE: /* This is what apple returns for the case above */ bufcpy(&ret.err_str, 0, invalid_dev_type(output)); break; case CL_DEVICE_NOT_AVAILABLE: bufcpy(&ret.err_str, 0, no_dev_avail(output)); break; default: if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "create context from type %s")) break; /* get the devices */ loc.sname = "CL_CONTEXT_DEVICES"; loc.line = __LINE__+2; ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s size")) break; if (szval > cursz) { REALLOC(devs, szval, "context devices"); cursz = szval; } loc.line = __LINE__+1; ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; ndevs = szval/sizeof(cl_device_id); if (ndevs < 1) { ret.err = CL_DEVICE_NOT_FOUND; bufcpy(&ret.err_str, 0, ""); } /* get the platform from the first device */ RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM); loc.line = __LINE__+1; ret.err = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; loc.plat = plat; szval = 0; for (i = 0; i < num_platforms; ++i) { if (platform[i] == plat) break; } if (i == num_platforms) { ret.err = CL_INVALID_PLATFORM; strbuf_printf(&ret.err_str, "", (void*)plat); break; } else { szval += strbuf_printf(&ret.str, "%s (%" PRIuS ")", (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"), ndevs); szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR "%s", platname_prop, pdata[i].pname); } for (i = 0; i < ndevs; ++i) { size_t szname = 0; /* for each device, show the device name */ /* TODO some other unique ID too, e.g. PCI address, if available? */ szval += snprintf(ret.str.buf + szval, ret.str.sz - szval, "\n" I2_STR, devname_prop); if (szval >= ret.str.sz) { trunc_strbuf(&ret.str); break; } RESET_LOC_PARAM(loc, dev, CL_DEVICE_NAME); loc.dev = devs[i]; loc.line = __LINE__+1; ret.err = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, ret.str.sz - szval, ret.str.buf + szval, &szname); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; szval += szname - 1; } if (i != ndevs) break; /* had an error earlier, bail */ } if (ctx) { clReleaseContext(ctx); ctx = NULL; } printf("%s%s\n", def, RET_BUF(ret)->buf); } free(devs); UNINIT_RET(ret); } /* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices) * and in clCreateContext() */ void checkNullBehavior(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const struct platform_data *pdata = plist->pdata; cl_uint p = 0; struct device_info_ret ret; INIT_RET(ret, "null behavior"); printf("NULL platform behavior\n"); checkNullGetPlatformName(output); p = checkNullGetDevices(plist, output); /* If there's a default platform, and it has devices, try * creating a context with its first device and see if it works */ if (p == num_platforms) { ret.err = CL_INVALID_PLATFORM; bufcpy(&ret.err_str, 0, no_plat(output)); } else if (pdata[p].ndevs == 0) { ret.err = CL_DEVICE_NOT_FOUND; bufcpy(&ret.err_str, 0, no_dev_found(output)); } else { if (p < num_platforms) { checkNullCtx(&ret, plist, p, "default", output); } else { /* this shouldn't happen, but still ... */ ret.err = CL_OUT_OF_HOST_MEMORY; bufcpy(&ret.err_str, 0, ""); } } printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", RET_BUF(ret)->buf); /* Look for a device from a non-default platform, if there are any */ if (p == num_platforms || num_platforms > 1) { cl_uint p2 = 0; while (p2 < num_platforms && (p2 == p || pdata[p2].ndevs == 0)) { p2++; } if (p2 < num_platforms) { checkNullCtx(&ret, plist, p2, "non-default", output); } else { ret.err = CL_DEVICE_NOT_FOUND; bufcpy(&ret.str, 0, ""); } printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", RET_BUF(ret)->buf); } checkNullCtxFromType(plist, output); UNINIT_RET(ret); } /* Get properties of the ocl-icd loader, if available */ /* All properties are currently char[] */ /* Function pointer to the ICD loader info function */ typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*); icdl_info_fn_ptr clGetICDLoaderInfoOCLICD; /* We want to auto-detect the OpenCL version supported by the ICD loader. * To do this, we will progressively find symbols introduced in new APIs, * until a NULL symbol is found. */ struct icd_loader_test { cl_uint version; const char *symbol; } icd_loader_tests[] = { { 11, "clCreateSubBuffer" }, { 12, "clCreateImage" }, { 20, "clSVMAlloc" }, { 21, "clGetHostTimer" }, { 22, "clSetProgramSpecializationConstant" }, { 0, NULL } }; void icdl_info_str(struct icdl_info_ret *ret, const struct info_loc *loc) { GET_STRING_LOC(ret, loc, clGetICDLoaderInfoOCLICD, loc->param.icdl); return; } struct icdl_info_traits { cl_icdl_info param; // CL_ICDL_* const char *sname; // "CL_ICDL_*" const char *pname; // "ICD loader *" }; static const char * const oclicdl_pfx = "OCLICD"; #define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name } struct icdl_info_traits linfo_traits[] = { LINFO(CL_ICDL_NAME, "Name"), LINFO(CL_ICDL_VENDOR, "Vendor"), LINFO(CL_ICDL_VERSION, "Version"), LINFO(CL_ICDL_OCL_VERSION, "Profile") }; /* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress, * which returns a void pointer. * ISO C forbids assignments between function pointers and void pointers, * but POSIX allows it. To compile without warnings even in -pedantic mode, * we take advantage of the fact that we _can_ do the conversion via * pointers-to-pointers. This is supported on most compilers, except * for some rather old GCC versions whose strict aliasing rules are * too strict. Disable strict aliasing warnings for these compilers. */ #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46) #pragma GCC diagnostic ignored "-Wstrict-aliasing" #endif struct icdl_data oclIcdProps(const struct platform_list *plist, const struct opt_out *output) { const cl_uint max_plat_version = plist->max_plat_version; struct icdl_data icdl; /* Counter that'll be used to walk the icd_loader_tests */ int i = 0; /* We find the clGetICDLoaderInfoOCLICD extension address, which will be used * to query the ICD loader properties. * It should be noted that in this specific case we cannot replace the * call to clGetExtensionFunctionAddress with a call to the superseding function * clGetExtensionFunctionAddressForPlatform because the extension is in the * loader itself, not in a specific platform. */ void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD"); clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack); /* Initialize icdl_data ret versions */ icdl.detected_version = 10; icdl.reported_version = 0; /* Step #1: try to auto-detect the supported ICD loader version */ do { struct icd_loader_test check = icd_loader_tests[i]; if (check.symbol == NULL) break; if (dlsym(DL_MODULE, check.symbol) == NULL) break; icdl.detected_version = check.version; ++i; } while (1); /* Step #2: query properties from extension, if available */ if (clGetICDLoaderInfoOCLICD != NULL) { struct info_loc loc; struct icdl_info_ret ret; reset_loc(&loc, __func__); INIT_RET(ret, "ICD loader"); /* TODO think of a sensible header in CLINFO_RAW */ if (output->mode != CLINFO_RAW) puts("\nICD loader properties"); if (output->mode == CLINFO_RAW) { line_pfx_len = (int)(strlen(oclicdl_pfx) + 5); REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD"); strbuf_printf(&ret.str, "[%s/*]", oclicdl_pfx); sprintf(line_pfx, "%*s", -line_pfx_len, ret.str.buf); } for (loc.line = 0; loc.line < ARRAY_SIZE(linfo_traits); ++loc.line) { const struct icdl_info_traits *traits = linfo_traits + loc.line; loc.sname = traits->sname; loc.pname = (output->mode == CLINFO_HUMAN ? traits->pname : traits->sname); loc.param.icdl = traits->param; ret.str.buf[0] = '\0'; ret.err_str.buf[0] = '\0'; icdl_info_str(&ret, &loc); show_strbuf(RET_BUF(ret), loc.pname, 1, ret.err); if (!ret.err && traits->param == CL_ICDL_OCL_VERSION) { icdl.reported_version = getOpenCLVersion(ret.str.buf + 7); } } UNINIT_RET(ret); } /* Step #3: show it */ if (output->mode == CLINFO_HUMAN) { if (icdl.reported_version && icdl.reported_version != icdl.detected_version) { printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %" PRIu32 ".%" PRIu32 ",\n" "\t\tbut it seems to support up to OpenCL %" PRIu32 ".%" PRIu32 " %s.\n", SPLIT_CL_VERSION(icdl.reported_version), SPLIT_CL_VERSION(icdl.detected_version), icdl.detected_version < icdl.reported_version ? "only" : "too"); } if (icdl.detected_version < max_plat_version) { printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %" PRIu32 ".%" PRIu32 ",\n" "\t\tbut some installed platforms support OpenCL %" PRIu32 ".%" PRIu32 ".\n" "\t\tPrograms using %" PRIu32 ".%" PRIu32 " features may crash\n" "\t\tor behave unexpectedly\n", SPLIT_CL_VERSION(icdl.detected_version), SPLIT_CL_VERSION(max_plat_version), SPLIT_CL_VERSION(max_plat_version)); } } return icdl; } #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46) #pragma GCC diagnostic warning "-Wstrict-aliasing" #endif void version(void) { puts("clinfo version 2.2.18.04.06"); } void usage(void) { version(); puts("Display properties of all available OpenCL platforms and devices"); puts("Usage: clinfo [options ...]\n"); puts("Options:"); puts("\t--all-props, -a\t\ttry all properties, only show valid ones"); puts("\t--always-all-props, -A\t\tshow all properties, even if invalid"); puts("\t--human\t\thuman-friendly output (default)"); puts("\t--raw\t\traw output"); puts("\t--offline\talso show offline devices"); puts("\t--list, -l\tonly list the platforms and devices by name"); puts("\t-h, -?\t\tshow usage"); puts("\t--version, -v\tshow version\n"); puts("Defaults to raw mode if invoked with"); puts("a name that contains the string \"raw\""); } int main(int argc, char *argv[]) { cl_uint p; cl_int err; int a = 0; struct opt_out output; struct platform_list plist; init_plist(&plist); output.mode = CLINFO_HUMAN; output.cond = COND_PROP_CHECK; output.brief = CL_FALSE; output.offline = CL_FALSE; output.check_size = CL_FALSE; /* if there's a 'raw' in the program name, switch to raw output mode */ if (strstr(argv[0], "raw")) output.mode = CLINFO_RAW; /* process command-line arguments */ while (++a < argc) { if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props")) output.cond = COND_PROP_TRY; else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props")) output.cond = COND_PROP_SHOW; else if (!strcmp(argv[a], "--raw")) output.mode = CLINFO_RAW; else if (!strcmp(argv[a], "--human")) output.mode = CLINFO_HUMAN; else if (!strcmp(argv[a], "--offline")) output.offline = CL_TRUE; else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list")) output.brief = CL_TRUE; else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h")) { usage(); return 0; } else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) { version(); return 0; } else { fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]); } } output.detailed = !output.brief; err = clGetPlatformIDs(0, NULL, &plist.num_platforms); if (err != CL_PLATFORM_NOT_FOUND_KHR) CHECK_ERROR(err, "number of platforms"); if (!output.brief) printf(I0_STR "%" PRIu32 "\n", (output.mode == CLINFO_HUMAN ? "Number of platforms" : "#PLATFORMS"), plist.num_platforms); if (!plist.num_platforms) return 0; alloc_plist(&plist); err = clGetPlatformIDs(plist.num_platforms, plist.platform, NULL); CHECK_ERROR(err, "platform IDs"); ALLOC(line_pfx, 1, "line prefix"); for (p = 0; p < plist.num_platforms; ++p) { gatherPlatformInfo(&plist, p, &output); if (output.detailed) puts(""); } showDevices(&plist, &output); if (output.detailed) { if (output.mode != CLINFO_RAW) checkNullBehavior(&plist, &output); oclIcdProps(&plist, &output); } free_plist(&plist); return 0; } clinfo-2.2.18.04.06/src/ctx_prop.h000066400000000000000000000015771326160521100163310ustar00rootroot00000000000000/* List of OpenCL context properties used to interoperate with a different API */ #ifndef CTX_PROP #define CTX_PROP /* cl_khr_gl_sharing */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C /* cl_khr_dx9_media_sharing */ #define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 #define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 #define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 /* cl_khr_d3d10_sharing */ #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 /* cl_khr_d3d11_sharing */ #define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D /* cl_intel_dx9_media_sharing */ #define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 #define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 #define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 /* cl_intel_va_api_media_sharing */ #define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 #endif clinfo-2.2.18.04.06/src/error.h000066400000000000000000000032201326160521100156070ustar00rootroot00000000000000/* OpenCL error handling */ #ifndef ERROR_H #define ERROR_H #include #include "ext.h" #include "info_loc.h" #include "fmtmacros.h" #include "strbuf.h" cl_int check_ocl_error(cl_int err, const char *what, const char *func, int line) { if (err != CL_SUCCESS) { fflush(stdout); fflush(stderr); fprintf(stderr, "%s:%u: %s : error %d\n", func, line, what, err); fflush(stderr); } return err; } cl_int report_ocl_error_basic(struct _strbuf *str, cl_int err, const char *what, const char *func, int line) { if (err != CL_SUCCESS) { snprintf(str->buf, str->sz, "<%s:%d: %s : error %d>", func, line, what, err); } return err; } cl_int report_ocl_error_loc(struct _strbuf *str, cl_int err, const char *fmt, const struct info_loc *loc) { static char full_fmt[1024]; if (err != CL_SUCCESS) { snprintf(full_fmt, 1024, "<%s:%" PRIuS ": %s : error %d>", loc->function, loc->line, fmt, err); snprintf(str->buf, str->sz, full_fmt, loc->sname); } return err != CL_SUCCESS; } void report_size_mismatch(struct _strbuf *str, size_t req, size_t ours, const struct info_loc *loc) { snprintf(str->buf, str->sz, "<%s:%" PRIuS ": %s : size mismatch " "(requested %" PRIuS ", we offer %" PRIuS ")>", loc->function, loc->line, loc->sname, req, ours); } #define CHECK_ERROR(error, what) if (check_ocl_error(error, what, __func__, __LINE__)) exit(1) #define REPORT_ERROR(str, err, what) report_ocl_error_basic(str, err, what, __func__, __LINE__) #define REPORT_ERROR_LOC(ret, err, loc, what) report_ocl_error_loc(&((ret)->err_str), err, what, loc) #define REPORT_SIZE_MISMATCH(str, loc, req, ours) report_size_mismatch(str, req, ours, loc) #endif clinfo-2.2.18.04.06/src/ext.h000066400000000000000000000204751326160521100152710ustar00rootroot00000000000000/* Include OpenCL header, and define OpenCL extensions, since what is and is not * available in the official headers is very system-dependent */ #ifndef EXT_H #define EXT_H /* We will use the deprecated clGetExtensionFunctionAddress, * so let the headers know that we don't care about it being deprecated. * The standard CL_USE_DEPRECATED_OPENCL_1_1_APIS define apparently * doesn't work for macOS, so we'll just tell the compiler to not * warn about deprecated functions. * A more correct solution would be to suppress the warning only around the * clGetExtensionFunctionAddress call, but honestly I just cleaned up that * piece of code. And I'm actually wondering if it even makes sense to * build that part of the code on macOS: does anybody actually use * ocl-icd as OpenCL dispatcher on macOS? */ #ifdef __APPLE__ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include #else #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include #endif /* These two defines were introduced in the 1.2 headers * on 2012-11-30, so earlier versions don't have them * (e.g. Debian wheezy) */ #ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B #endif /* 2.0 headers are not very common for the time being, so * let's copy the defines for the new CL_DEVICE_* properties * here. */ #ifndef CL_VERSION_2_0 #define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C #define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D #define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A #define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E #define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F #define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 #define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 #define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 #define CL_DEVICE_SVM_CAPABILITIES 0x1053 #define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 #define CL_DEVICE_MAX_PIPE_ARGS 0x1055 #define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 #define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 #define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 #define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 #define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) #define CL_DEVICE_SVM_ATOMICS (1 << 3) typedef cl_bitfield cl_device_svm_capabilities; #endif #ifndef CL_VERSION_2_1 #define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 #define CL_DEVICE_IL_VERSION 0x105B #define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C #define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D #endif /* * Extensions */ /* cl_khr_icd */ #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 #define CL_PLATFORM_NOT_FOUND_KHR -1001 /* cl_amd_object_metadata */ #define CL_PLATFORM_MAX_KEYS_AMD 0x403C /* cl_khr_fp64 */ #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* cl_khr_fp16 */ #define CL_DEVICE_HALF_FP_CONFIG 0x1033 /* cl_khr_il_program */ #define CL_DEVICE_IL_VERSION_KHR 0x105B /* cl_khr_terminate_context */ #define CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x 0x200F #define CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x 0x2031 /* TODO: I cannot find official definitions for these, * so I'm currently extrapolating them from the specification */ typedef cl_bitfield cl_device_terminate_capability_khr; #define CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR (1<<0) /* cl_khr_subgroup_named_barrier */ #define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 /* cl_nv_device_attribute_query */ #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 #define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 #define CL_DEVICE_PCI_BUS_ID_NV 0x4008 #define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 /* cl_ext_atomic_counters_{32,64} */ #define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 /* cl_amd_device_attribute_query */ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 #define CL_DEVICE_TOPOLOGY_AMD 0x4037 #define CL_DEVICE_BOARD_NAME_AMD 0x4038 #define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 #define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 #define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 #define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 #define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 #define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 #define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 #define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A #define CL_DEVICE_GFXIP_MINOR_AMD 0x404B #define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C /* These two are undocumented */ #define CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD 0x404D #define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E /* These were added in v4 of the extension, but have values lower than * than the older ones, and spanning around the cl_ext_atomic_counters_* * define */ #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 #define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 #define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 #define CL_DEVICE_PCIE_ID_AMD 0x4034 #ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD #define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 typedef union { struct { cl_uint type; cl_uint data[5]; } raw; struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; } cl_device_topology_amd; #endif /* cl_amd_offline_devices */ #define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F /* cl_amd_copy_buffer_p2p */ #define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088 #define CL_DEVICE_P2P_DEVICES_AMD 0x4089 /* cl_ext_device_fission */ #define cl_ext_device_fission 1 typedef cl_ulong cl_device_partition_property_ext; #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 #define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */ #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 /* cl_intel_advanced_motion_estimation */ #define CL_DEVICE_ME_VERSION_INTEL 0x407E /* cl_intel_device_side_avc_motion_estimation */ #define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B #define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C #define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D /* cl_intel_planar_yuv */ #define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E #define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F /* cl_qcom_ext_host_ptr */ #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 #define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 /* cl_arm_shared_virtual_memory */ #define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 /* cl_khr_spir */ #define CL_DEVICE_SPIR_VERSIONS 0x40E0 /* cl_altera_device_temperature */ #define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 /* cl_intel_simultaneous_sharing */ #define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 #define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 /* cl_intel_required_subgroup_size */ #define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 /* clGeICDLoaderInfoOCLICD */ typedef enum { CL_ICDL_OCL_VERSION=1, CL_ICDL_VERSION=2, CL_ICDL_NAME=3, CL_ICDL_VENDOR=4, } cl_icdl_info; #endif clinfo-2.2.18.04.06/src/fmtmacros.h000066400000000000000000000015111326160521100164520ustar00rootroot00000000000000/* cl_ulong is always a 64bit integer, so in a few places we want to use its shadow type uint64_t, and print the values using PRIu64. We'll similarly define one for size_t, to make support for non-standard/older compiler easier. */ #ifndef FMT_MACROS_H #define FMT_MACROS_H #ifdef _WIN32 /* TODO FIXME WIN64 support */ # include # include // size_t # define PRIu32 "I32u" # define PRId32 "I32d" # define PRIx32 "I32x" # define PRIX32 "I32X" # define PRIu64 "I64u" # define PRIx64 "I64x" # define PRIX64 "I64X" # define PRIuS "Iu" #if INTPTR_MAX <= INT32_MAX # define PRIXPTR PRIX32 # define PRIxPTR PRIx32 #else # define PRIXPTR PRIX64 # define PRIxPTR PRIx64 #endif #else # define __STDC_FORMAT_MACROS # include #endif // size_t print spec #ifndef PRIuS # define PRIuS "zu" #endif #endif clinfo-2.2.18.04.06/src/info_loc.h000066400000000000000000000012351326160521100162520ustar00rootroot00000000000000#ifndef INFO_LOC_H #define INFO_LOC_H #include "ext.h" struct info_loc { const char *function; const char *sname; // parameter symbolic name const char *pname; // parameter printable name size_t line; cl_platform_id plat; cl_device_id dev; union { cl_platform_info plat; cl_device_info dev; cl_icdl_info icdl; } param; }; static inline void reset_loc(struct info_loc *loc, const char *func) { loc->function = func; loc->sname = loc->pname = NULL; loc->line = 0; loc->plat = NULL; loc->dev = NULL; loc->param.plat = 0; } #define RESET_LOC_PARAM(_loc, _dev, _param) do { \ _loc.param._dev = _param; \ _loc.sname = #_param; \ } while (0) #endif clinfo-2.2.18.04.06/src/info_ret.h000066400000000000000000000037771326160521100163040ustar00rootroot00000000000000#ifndef INFO_RET_H #define INFO_RET_H #include "ext.h" #include "strbuf.h" /* Return type of the functions that gather platform info */ struct platform_info_ret { cl_int err; /* string representation of the value (if any) */ struct _strbuf str; /* error representation of the value (if any) */ struct _strbuf err_str; /* actual value, when not a string */ union { size_t s; cl_ulong u64; } value; }; /* Return type of the functions that print device info */ struct device_info_ret { cl_int err; /* string representation of the value (if any) */ struct _strbuf str; /* error representation of the value (if any) */ struct _strbuf err_str; /* actual value, when not a string */ union { size_t s; cl_long i64; cl_ulong u64; cl_int i32; cl_uint u32; cl_uint4 u32v; cl_bool b; cl_device_type devtype; cl_device_mem_cache_type cachetype; cl_device_local_mem_type lmemtype; cl_device_topology_amd devtopo; cl_device_affinity_domain affinity_domain; cl_device_fp_config fpconfig; cl_command_queue_properties qprop; cl_device_exec_capabilities execap; cl_device_svm_capabilities svmcap; cl_device_terminate_capability_khr termcap; } value; /* pointer base for array data or other auxiliary information */ union { void *ptr; // TODO cl_context ctx; // associated context } base; }; /* Return type of the functions that gather ICD loader info */ struct icdl_info_ret { cl_int err; /* string representation of the value (if any) */ struct _strbuf str; /* error representation of the value (if any) */ struct _strbuf err_str; }; #define RET_BUF(ret) (ret.err ? &ret.err_str : &ret.str) #define RET_BUF_PTR(ret) (ret->err ? &ret->err_str : &ret->str) #define INIT_RET(ret, msg) do { \ init_strbuf(&ret.str); \ init_strbuf(&ret.err_str); \ realloc_strbuf(&ret.str, 1024, msg " info string values"); \ realloc_strbuf(&ret.err_str, 1024, msg " info error values"); \ } while (0) #define UNINIT_RET(ret) do { \ free_strbuf(&ret.str); \ free_strbuf(&ret.err_str); \ } while (0) #endif clinfo-2.2.18.04.06/src/memory.h000066400000000000000000000007501326160521100157730ustar00rootroot00000000000000/* Memory handling */ #ifndef MEMORY_H #define MEMORY_H #include #define CHECK_MEM(var, what) do { \ if (!(var)) { \ fprintf(stderr, "%s:%d: %s : Out of memory\n", \ __func__, __LINE__, what); \ exit(1); \ } \ } while (0) #define ALLOC(var, num, what) do { \ var = calloc(num, sizeof(*(var))); \ CHECK_MEM(var, what); \ } while (0) #define REALLOC(var, num, what) do { \ var = realloc(var, (num)*sizeof(*(var))); \ CHECK_MEM(var, what); \ } while (0) #endif clinfo-2.2.18.04.06/src/ms_support.h000066400000000000000000000031751326160521100167020ustar00rootroot00000000000000/* Missing functions and other misc stuff to support * the horrible MS C compiler * * TODO could be improved by version-checking for C99 support */ #ifndef MS_SUPPORT #define MS_SUPPORT // disable warning about unsafe strncpy vs strncpy_s usage #pragma warning(disable : 4996) // disable warning about constant conditional expressions #pragma warning(disable : 4127) // disable warning about non-constant aggregate initializer #pragma warning(disable : 4204) // disable warning about global shadowing #pragma warning(disable : 4459) // disable warning about parameter shadowing #pragma warning(disable : 4457) // Suppress warning about unused parameters. The macro definition // _should_ work, but it doesn't on VS2012 (cl 17), may be a version thing #define UNUSED(x) x __pragma(warning(suppress: 4100)) // TODO FIXME remove full-blown warning removal where not needed #pragma warning(disable: 4100) // No inline in MS C #define inline __inline // No snprintf in MS C, copy over implementation taken from // stackoverflow #include #include inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap) { int count = -1; if (size != 0) count = _vsnprintf_s(str, size, _TRUNCATE, format, ap); if (count == -1) count = _vscprintf(format, ap); return count; } inline int c99_snprintf(char* str, size_t size, const char* format, ...) { int count; va_list ap; va_start(ap, format); count = c99_vsnprintf(str, size, format, ap); va_end(ap); return count; } #define snprintf c99_snprintf // And no __func__ either #define __func__ __FUNCTION__ #endif clinfo-2.2.18.04.06/src/opt_out.h000066400000000000000000000022101326160521100161450ustar00rootroot00000000000000/* clinfo output options */ #ifndef OPT_OUT_H #define OPT_OUT_H #include "ext.h" enum output_modes { CLINFO_HUMAN = 1, /* more human readable */ CLINFO_RAW = 2, /* property-by-property */ CLINFO_BOTH = CLINFO_HUMAN | CLINFO_RAW }; /* Specify how we should handle conditional properties. */ enum cond_prop_modes { COND_PROP_CHECK = 0, /* default: check, skip if invalid */ COND_PROP_TRY = 1, /* try, don't print an error if invalid */ COND_PROP_SHOW = 2 /* try, print an error if invalid */ }; /* Output options */ struct opt_out { enum output_modes mode; enum cond_prop_modes cond; /* Specify if we should only be listing the platform and devices; * can be done in both human and raw mode, and only the platform * and device names (and number) will be shown * TODO check if terminal supports UTF-8 and use Unicode line-drawing * for the tree in list mode */ cl_bool brief; cl_bool detailed; // !brief cl_bool offline; /* clGetDeviceInfo returns CL_INVALID_VALUE both for unknown properties * and when the destination variable is too small. Set the following to CL_TRUE * to check which one is the case */ cl_bool check_size; }; #endif clinfo-2.2.18.04.06/src/strbuf.h000066400000000000000000000064321326160521100157730ustar00rootroot00000000000000/* multi-purpose string _strbuf, will be initialized to be * at least 1024 bytes long. */ #ifndef STRBUF_H #define STRBUF_H #include #include #include #include #include "memory.h" #include "fmtmacros.h" struct _strbuf { char *buf; size_t sz; }; static inline void init_strbuf(struct _strbuf *str) { str->buf = NULL; str->sz = 0; } static inline void free_strbuf(struct _strbuf *str) { free(str->buf); init_strbuf(str); } #define strbuf_printf(str, ...) snprintf((str)->buf, (str)->sz, __VA_ARGS__) static inline void realloc_strbuf(struct _strbuf *str, size_t nusz, const char* what) { if (nusz > str->sz) { REALLOC(str->buf, nusz, what); str->sz = nusz; } } #define GET_STRING(str, err, cmd, param, param_str, ...) do { \ size_t nusz; \ err = cmd(__VA_ARGS__, param, 0, NULL, &nusz); \ if (REPORT_ERROR(str, err, "get " param_str " size")) break; \ realloc_strbuf(str, nusz, #param); \ err = cmd(__VA_ARGS__, param, (str)->sz, (str)->buf, NULL); \ REPORT_ERROR(str, err, "get " param_str); \ } while (0) #define GET_STRING_LOC(ret, loc, cmd, ...) do { \ size_t nusz; \ ret->err = REPORT_ERROR_LOC(ret, \ cmd(__VA_ARGS__, 0, NULL, &nusz), \ loc, "get %s size"); \ if (!ret->err) { \ realloc_strbuf(&ret->str, nusz, loc->sname); \ ret->err = REPORT_ERROR_LOC(ret, \ cmd(__VA_ARGS__, ret->str.sz, ret->str.buf, NULL), \ loc, "get %s"); \ } \ } while (0) /* Skip leading whitespace in a string */ static inline const char* skip_leading_ws(const char *str) { const char *ret = str; while (isspace(*ret)) ++ret; return ret; } /* replace last 3 chars in _strbuf with ... */ static const char ellip[] = "..."; static inline void trunc_strbuf(struct _strbuf *str) { memcpy(str->buf + str->sz - 4, ellip, 4); } /* copy a string to _strbuf, at the given offset, * returning the amount of bytes written (excluding the * closing NULL byte) */ static inline size_t bufcpy_len(struct _strbuf *str, size_t offset, const char *src, size_t len) { size_t maxlen = str->sz - offset - 1; char *dst = str->buf + offset; int trunc = 0; if (str->sz < offset) { fprintf(stderr, "bufcpy overflow copying %s at offset %" PRIuS "/%" PRIuS " (%s)\n", src, offset, str->sz, str->buf); maxlen = 0; trunc = 1; } if (len > maxlen) { len = maxlen; trunc = 1; /* TODO enlarge str->buf instead, if maxlen > 0 */ } memcpy(dst, src, len); offset += len; if (trunc) trunc_strbuf(str); else str->buf[offset] = '\0'; return len; } /* As above, auto-compute string length */ static inline size_t bufcpy(struct _strbuf *str, size_t offset, const char *src) { return bufcpy_len(str, offset, src, strlen(src)); } /* Separators: we want to be able to prepend separators as needed to _strbuf, * which we do only if halfway through the buffer. The callers should first * call a 'set_separator' and then use add_separator(&offset) to add it, where szval * is an offset inside the buffer, which will be incremented as needed */ const char *sep; size_t sepsz; void set_separator(const char* _sep) { sep = _sep; sepsz = strlen(sep); } /* Note that no overflow check is done: it is assumed that _strbuf will have enough room */ void add_separator(struct _strbuf *str, size_t *offset) { if (*offset) *offset += bufcpy_len(str, *offset, sep, sepsz); } #endif