pax_global_header00006660000000000000000000000064147536706560014535gustar00rootroot0000000000000052 comment=c57b7e73e910ca1b558415472c8192eeb75711b1 clinfo-3.0.25.02.14/000077500000000000000000000000001475367065600135415ustar00rootroot00000000000000clinfo-3.0.25.02.14/.appveyor.yml000066400000000000000000000005301475367065600162050ustar00rootroot00000000000000version: 2.2.18.03.22-{build} image: Visual Studio 2015 shallow_clone: true platform: - x86 - x64 init: - cmd: C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM% install: - cmd: fetch-opencl-dev-win.cmd %PLATFORM% build_script: - cmd: make.cmd test_script: - cmd: clinfo artifacts: - path: clinfo.exe clinfo-3.0.25.02.14/.gitignore000066400000000000000000000000221475367065600155230ustar00rootroot00000000000000clinfo .*.swp *.o clinfo-3.0.25.02.14/.travis.yml000066400000000000000000000004021475367065600156460ustar00rootroot00000000000000os: - linux - osx arch: - amd64 - ppc64le dist: trusty addons: apt: sources: - sourceline: "deb http://archive.ubuntu.com/ubuntu trusty universe" packages: - ocl-icd-opencl-dev language: c compiler: - gcc - clang script: make && ./clinfo clinfo-3.0.25.02.14/LICENSE000066400000000000000000000004731475367065600145520ustar00rootroot00000000000000clinfo by Giuseppe Bilotta To the extent possible under law, the person who associated CC0 with clinfo has waived all copyright and related or neighboring rights to clinfo. You should have received a copy of the CC0 legalcode along with this work. If not, see clinfo-3.0.25.02.14/Makefile000066400000000000000000000073471475367065600152140ustar00rootroot00000000000000# An interesting trick to run a shell command: # GNU Make uses $(shell cmd), whereas # BSD make use $(var:sh), where ${var} holds the command # We can run a shell command on both by storing the value of the command # in a variable var and then using $(shell $(var))$(var:sh). # To detect the operating system it's generally sufficient to run `uname -s`, # but this way Android is detected as Linux. Android can be detected by `uname -o`, # but not all `uname` implementation even have the `-o` flag. # So we first detect the kernel, and then if it's Linux we use the -o detection # to find if this is Android, otherwise falling back to whatever the kernel was. OS.exec = t="$$(uname -s)" ; [ Linux = "$$t" ] && uname -o || printf "%s\n" "$$t" OS ?= $(shell $(OS.exec))$(OS.exec:sh) # Force expansion OS := $(OS) # Headers PROG = clinfo MAN = man1/$(PROG).1 HDR = src/error.h \ src/ext.h \ src/ctx_prop.h \ src/fmtmacros.h \ src/memory.h \ src/ms_support.h \ src/info_loc.h \ src/info_ret.h \ src/opt_out.h \ src/strbuf.h VPATH = src # Make it easier to find the OpenCL headers on systems # that don't ship them by default; the user can just clone # them on a parallel directory from the official repository CPPFLAGS += -I../OpenCL-Headers CFLAGS ?= -g -pedantic -Werror CFLAGS += -std=c99 -Wall -Wextra SPARSE ?= sparse SPARSEFLAGS=-Wsparse-all -Wno-decl # BSD make does not define RM RM ?= rm -f # Installation paths and modes PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin BINMODE ?= 555 MANDIR ?= $(PREFIX)/man MAN1DIR ?= $(MANDIR)/man1 MANMODE ?= 444 ANDROID_VENDOR_PATH ?= ${ANDROID_ROOT}/vendor/lib64 LDFLAGS_Android += -Wl,-rpath-link=${ANDROID_VENDOR_PATH} -L${ANDROID_VENDOR_PATH} LDFLAGS += $(LDFLAGS_$(OS)) # Common library includes LDLIBS__common = -lOpenCL -ldl # OS-specific library includes LDLIBS_Darwin = -framework OpenCL LDLIBS_Darwin_exclude = -lOpenCL LDLIBS += $(LDLIBS_${OS}) $(LDLIBS__common:$(LDLIBS_${OS}_exclude)=) # The main target is the executable, which is normally called clinfo. # However, on Android, due to the lack of support for RPATH, clinfo # needs an approprite LD_LIBRARY_PATH, so we map `clinfo` to a shell script # that sets LD_LIBRARY_PATH and invokes the real program, which is now called # clinfo.real. # # Of course on Android we need to buid both, but not on other OSes EXT.Android = .real EXENAME = $(PROG)$(EXT.${OS}) TARGETS.Android = $(PROG) TARGETS = $(EXENAME) $(TARGETS.${OS}) all: $(TARGETS) # # Targets to actually build the stuff # # Many versions of make define a LINK.c as a synthetic rule to link # C object files. In case it's not defined already, propose our own: LINK.c ?= $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) # Recipe for the actual executable, either clinfo (non-Android) # or clinfo.real (on Androd) $(EXENAME): $(PROG).o $(LINK.c) -o $@ $< $(LDLIBS) $(PROG).o: $(PROG).c $(HDR) # For Android: create a wrapping shell script to run # clinfo with the appropriate LD_LIBRARY_PATH. $(OS:Android=)$(PROG): @echo '#!/bin/sh' > $@ @echo 'wrapperdir="$$(dirname "$$(readlink -n -f "$$(command -v "$$0")")")"' >> $@ @echo 'LD_LIBRARY_PATH="${ANDROID_VENDOR_PATH}:${ANDROID_VENDOR_PATH}/egl:$$LD_LIBRARY_PATH" $${wrapperdir}/$(EXENAME) "$$@"' >> $@ chmod +x $@ clean: $(RM) $(PROG).o $(TARGETS) install: all install -d $(DESTDIR)$(BINDIR) install -d $(DESTDIR)$(MAN1DIR) install -p -m $(BINMODE) $(PROG) $(DESTDIR)$(BINDIR)/$(PROG) install -p -m $(MANMODE) $(MAN) $(DESTDIR)$(MAN1DIR) sparse: $(PROG).c $(SPARSE) $(CPPFLAGS) $(CFLAGS) $(SPARSEFLAGS) $^ show: @printf 'OS=%s\n' "${OS}" @printf 'CFLAGS=%s\n' "${CFLAGS}" @printf 'CPPFLAGS=%s\n' "${CPPFLAGS}" @printf 'LDFLAGS=%s\n' "${LDFLAGS}" @printf 'LDLIBS=%s\n' "${LDLIBS}" @printf 'TARGETS=%s\n' "${TARGETS}" .PHONY: clean sparse install show clinfo-3.0.25.02.14/Makefile.win000066400000000000000000000032401475367065600157740ustar00rootroot00000000000000# TODO FIXME find a better way to detect the directory to use # for OpenCL development files !IF "$(OPENCLDIR)" == "" OPENCLDIR = $(INTELOCLSDKROOT) !ENDIF !IF "$(OPENCLDIR)" == "" OPENCLDIR = $(AMDAPPSDKROOT) !ENDIF !IF "$(OPENCLDIR)" == "" OPENCLDIR = $(MAKEDIR) !ENDIF !IF "$(OPENCLDIR)" == "" OPENCLDIR = . !ENDIF !MESSAGE OpenCL dir: $(OPENCLDIR) HDR = src/error.h \ src/ext.h \ src/ctx_prop.h \ src/fmtmacros.h \ src/memory.h \ src/ms_support.h \ src/info_loc.h \ src/info_ret.h \ src/opt_out.h \ src/strbuf.h CFLAGS = /GL /Ox /W4 /Zi /I"$(OPENCLDIR)\include" /nologo LIBS = libOpenCL.a # TODO there's most likely a better way to do the multiarch # switching !IF "$(PROCESSOR_ARCHITECTURE)" == "AMD64" ARCH=64 !ELSE ARCH=32 !ENDIF # Platform=x64 in the 64-bit cross-platform build of my VS !IF "$(PLATFORM)" == "x64" || "$(PLATFORM)" == "X64" ARCH=64 !ELSE IF "$(PLATFORM)" == "x86" || "$(PLATFORM)" == "X86" ARCH=32 !ENDIF !MESSAGE Building for $(ARCH)-bit (processor architecture: $(PROCESSOR_ARCHITECTURE), platform: $(PLATFORM)) LIBPATH32 = /LIBPATH:"$(OPENCLDIR)\lib" /LIBPATH:"$(OPENCLDIR)\lib\x86" LIBPATH64 = /LIBPATH:"$(OPENCLDIR)\lib\x64" /LIBPATH:"$(OPENCLDIR)\lib\x86_64" /LIBPATH:"$(OPENCLDIR)\lib\x86_amd64" # And since we can't do $(LIBPATH$(ARCH)) with nmake ... !IF "$(ARCH)" == "64" LINKOPTS = /LTCG $(LIBPATH64) /nologo !ELSE LINKOPTS = /LTCG $(LIBPATH32) /nologo !ENDIF clinfo.exe: clinfo.obj link $(LINKOPTS) $(LIBS) clinfo.obj /out:clinfo.exe clinfo.obj: src/clinfo.c $(HDR) $(CC) $(CFLAGS) /c src/clinfo.c /Foclinfo.obj clean: del /F /Q clinfo.exe clinfo.obj .PHONY: clean clinfo-3.0.25.02.14/README.md000066400000000000000000000141641475367065600150260ustar00rootroot00000000000000# What is this? clinfo is a simple command-line application that enumerates all possible (known) properties of the OpenCL platform and devices available on the system. Inspired by AMD's program of the same name, it is coded in pure C and it tries to output all possible information, including those provided by platform-specific extensions, trying not to crash on unsupported properties (e.g. 1.2 properties on 1.1 platforms). # Usage clinfo [options...] Common used options are `-l` to show a synthetic summary of the available devices (without properties), and `-a`, to try and show properties even if `clinfo` would otherwise think they aren't supported by the platform or device. Refer to the man page for further information. ## Use cases * verify that your OpenCL environment is set up correctly; if `clinfo` cannot find any platform or devices (or fails to load the OpenCL dispatcher library), chances are high no other OpenCL application will run; * verify that your OpenCL _development_ environment is set up correctly: if `clinfo` fails to build, chances are high no other OpenCL application will build; * explore/report the actual properties of the available device(s). ## Segmentation faults Some faulty OpenCL platforms may cause `clinfo` to crash. There isn't much `clinfo` itself can do about it, but you can try and isolate the platform responsible for this. On POSIX systems, you can generally find the platform responsible for the fault with the following one-liner: find /etc/OpenCL/vendors/ -name '*.icd' | while read OPENCL_VENDOR_PATH ; do clinfo -l > /dev/null ; echo "$? ${OPENCL_VENDOR_PATH}" ; done ## Missing information If you know of device properties that are exposed in OpenCL (either as core properties or as extensions), but are not shown by `clinfo`, please [open an issue](https://github.com/Oblomov/clinfo/issues) providing as much information as you can. Patches and pull requests accepted too. # Building Build status on Travis Building requires an OpenCL SDK (or at least OpenCL headers and development files), and the standard build environment for the platform. No special build system is used (autotools, CMake, meson, ninja, etc), as I feel adding more dependencies for such a simple program would be excessive. Simply running `make` at the project root should work. ## Android support ### Local build via Termux One way to build the application on Android, pioneered by [truboxl][truboxl] and described [here][issue46], requires the installation of [Termux][termux], that can be installed via Google Play as well as via F-Droid. [truboxl]: https://github.com/truboxl [issue46]: https://github.com/Oblomov/clinfo/issues/46 [termux]: https://termux.com/ Inside Termux, you will first need to install some common tools: pkg install git make clang -y You will also need to clone the `clinfo` repository, and fetch the OpenCL headers (we'll use the official `KhronosGroup/OpenCL-Headers` repository for that): git clone https://github.com/Oblomov/clinfo git clone https://github.com/KhronosGroup/OpenCL-Headers (I prefer doing this from a `src` directory I have created for development, but as long as `clinfo` and `OpenCL-Headers` are sibling directories, the headers will be found. If not, you will have to override `CPPFLAGS` with e.g. `export CPPFLAGS=-I/path/to/where/headers/are` before running `make`. Of course `/path/to/where/headers/are` should be replaced with the actual path to which the `OpenCL-Headers` repository was cloned.) You can then `cd clinfo` and build the application. You can try simply running `make` since Android should be autodetected now, buf it this fails you can also force the detectio with make OS=Android If linking fails due to a missing `libOpenCL.so`, then your Android machine probably doesn't support OpenCL. Otherwise, you should have a working `clinfo` you can run. You will most probably need to set `LD_LIBRARY_PATH` to let the program know where the OpenCL library is at runtime: you will need at least `${ANDROID_ROOT}/vendor/lib64`, but on some machine the OpenCL library actually maps to a different library (e.g., on one of my systems, it maps to the GLES library, which is in a different subdirectory). Due to this requirement, on Android the actual binary is now called `clinfo.real`, and the produced `clinfo` is just a shell script that will run the actual binary after setting `LD_LIBRARY_PATH`. If this is not sufficient on your installation, please open an issue and we'll try to improve the shell script to cover your use case as well. ## MacOS support clinfo should build without issues out of the box on most macOS installations (starting from OS X v10.6). In contrast to most other operating systems, the macOS system OpenCL library only supports Apple's own OpenCL platform. To use other platforms such as [PoCL](https://portablecl.org), it is necessary to install an alternative OpenCL library that works as an ICD loader, such as [Homebrew](https://brew.sh)'s [ocl-icd](https://formulae.brew.sh/formula/ocl-icd). To build `clinfo` using the Homebrew OpenCL library instead of the macOS system library, you can use make OS=Homebrew ## Windows support The application can usually be built in Windows too (support for which required way more time than I should have spent, really, but I digress), by running `make` in a Developer Command Prompt for Visual Studio, provided an OpenCL SDK (such as the Intel or AMD one) is installed. Precompiled Windows executable are available as artefacts of the AppVeyor CI.
Build statusWindows binaries
Build status on AppVeyor 32-bit 64-bit
clinfo-3.0.25.02.14/TODO000066400000000000000000000010131475367065600142240ustar00rootroot00000000000000cl_khr_integer_dot_product https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_integer_dot_product cl_khr_kernel_clock https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#cl_khr_kernel_clock (extension is provisional; defines have already been imported in ext.h) cl_khr_command_buffer_multi_device https://registry.khronos.org/OpenCL/sdk/3.0/docs/man/html/cl_khr_command_buffer_multi_device.html (extension is provisional; defines have already been imported in ext.h) clinfo-3.0.25.02.14/fetch-opencl-dev-win.cmd000066400000000000000000000010031475367065600201360ustar00rootroot00000000000000REM call as fetch-opencl-dev-win x86|x86_64|x64 git clone https://github.com/KhronosGroup/OpenCL-Headers include set sub=%1 if /i "%sub%" == "x64" set sub=x86_64 mkdir lib\%sub% curl -L -o lib/%sub%/libOpenCL.a https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/lib/%sub%/libOpenCL.a -o lib/%sub%/OpenCL.lib https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/lib/%sub%/OpenCL.lib -o OpenCL.dll https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/bin/%sub%/OpenCL.dll clinfo-3.0.25.02.14/legalcode.txt000066400000000000000000000156101475367065600162240ustar00rootroot00000000000000Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. clinfo-3.0.25.02.14/make.cmd000066400000000000000000000000421475367065600151370ustar00rootroot00000000000000nmake /F Makefile.win /nologo %* clinfo-3.0.25.02.14/man1/000077500000000000000000000000001475367065600143755ustar00rootroot00000000000000clinfo-3.0.25.02.14/man1/clinfo.1000066400000000000000000000301761475367065600157400ustar00rootroot00000000000000.TH CLINFO 1 "2025-02-14" "clinfo 3.0.25.02.14" .SH NAME clinfo \- show OpenCL platforms and devices .SH SYNOPSIS .B clinfo .RI [ "options ..." ] .SH DESCRIPTION .B clinfo prints all available information about all OpenCL platforms available on the system and the devices they expose. .SH OPTIONS .B clinfo accepts the following options: .TP 2 .BR -a ", " --all-props try to retrieve all properties, even those not officially supported (e.g. because they require specific extensions), but only show them if the property could be retrieved successfully; see also the .B LIMITATIONS section below; note that even though this may reveal hidden properties, there is no guarantee that the returned values are meaningful, nor that the corresponding feature is actually available at all; .TP .BR -A ", " --always-all-props like .BR -a , but also show errors; .TP .B --human produce human-friendly output; this is the default (except as noted below); .TP .B --raw produce machine-friendly output; this is the default if .B clinfo is invoked with a name that contains the string .RI \*(lq raw \*(rq; .TP .B --json outputs the raw data (cf. the .B --raw option) in JSON format; support for this option is experimental, as the representation of some of the values is not finalized; .TP .B --offline shows also offline devices for platforms that expose this feature; .TP .B --null-platform tries to handle the NULL platform as a normal platform, retrieving and showing its properties and devices; this is in addition to the NULL platform behavior tests done at the end, and can be useful on systems where there are no ICD platforms, but there is a platform hard-coded in the OpenCL library itself; .TP .BR -l ", " --list list platforms and devices by name, with no (other) properties; .TP .BI -d " platform_index" : device_index .TQ .BI --device " platform_index" : device_index only show properties for the specified device in the specified platform; multiple device specifications may be given on the command-line; .TP .BI --prop " property-name" only show properties whose symbolic name matches (contains as a substring) the given .IR property-name ; the name is normalized as upper-case and with minus sign (-) replaced by underscore signs (_); multiple property specifications may be given on the command-line; when this flag is specified, raw mode is forced; .TP .BR --help ", " -? ", " -h show usage; .TP .BR --version ", " -v show program version. .SH CONFORMING TO OpenCL 1.1, OpenCL 1.2, OpenCL 2.0, OpenCL 2.1, OpenCL 2.2, OpenCL 3.0. .SH EXTENSIONS Supported OpenCL extensions: .TP 2 .B cl_khr_device_uuid for the UUID, LUID and node mask of the device; .TP .B cl_khr_extended_versioning for the extended platform, device, extension and IL versioned properties backported from OpenCL 3.0 to previous OpenCL versions; .TP .BR cl_khr_fp16 ", " cl_khr_fp64 ", " cl_amd_fp64 ", " cl_APPLE_fp64_basic_ops for information about support for half-precision and double-precision floating-point data types; .TP .B cl_khr_image2d_from_buffer for information about the base address and pitch alignment requirements of buffers to be used as base for 2D images; .TP .B cl_khr_il_program for information about the supported IL (Intermediate Language) representations; .TP .B cl_khr_spir for information about the supported SPIR (Standard Portable Intermediate Representation) versions; .TP .B cl_khr_icd for the suffix of vendor extensions functions; .TP .B cl_khr_subgroup_named_barrier for the maximum number of named sub-group barriers; .TP .BI cl_khr_terminate_context ", " cl_arm_controlled_kernel_termination for the terminate capabilities for the device; .TP .B cl_ext_device_fission for device fission support in OpenCL 1.1 devices; .TP .B cl_khr_pci_bus_info for the PCI bus information (see also .BR cl_nv_device_attribute_query " and" .BR cl_amd_device_attribute_query ) .TP .B cl_ext_atomic_counters_32 .TQ .B cl_ext_atomic_counters_64 for the atomic counter extension; .TP .B cl_ext_float_atomics for the floating-point atomic capabilities for half, single and double precision (depending on hardware floating-point size support); .TP .B cl_ext_cxx_for_opencl for the version of the C++ for OpenCL language supported by the device compiler; .TP .B cl_amd_device_attribute_query for AMD-specific device attributes; .TP .B cl_amd_object_metadata to show the maximum number of keys supported by the platform; .TP .B cl_amd_offline_devices to show offline devices exposed by the platform, if requested (see .B --offline option); .TP .B cl_amd_copy_buffer_p2p to show the number and IDs of available P2P devices; .TP .B cl_amd_svm .TQ .B cl_arm_shared_virtual_memory for Shared Virtual Memory (SVM) capabilities in OpenCL 1.2 devices; .TP .B cl_arm_core_id to show the (potentially sparse) list of the core IDs that the device may return; .TP .B cl_arm_job_slot_selection to show the (potentially sparse) list of available job slots for command submission; .TP .B cl_arm_scheduling_controls to show the supported work scheduling controls and the available sets of register allocations; .TP .B cl_nv_device_attribute_query for NVIDIA-specific device attributes; .TP .B cl_intel_device_attribute_query for Intel-specific device attributes; .TP .B cl_intel_exec_by_local_thread for the Intel extension allowing CPU devices to run kernels as part of the current host thread; .TP .B cl_intel_advanced_motion_estimation for the version of the Intel Motion Estimation accelerator version; .TP .B cl_intel_device_side_avc_motion_estimation for the version and supported features of Intel's device-side AVC Motion; .TP .B cl_intel_planar_yuv for the maximum dimensions of planar YUV images; .TP .B cl_intel_simultaneous_sharing for simultaneous CL/GL/DirectX context sharing (only partial support); .TP .B cl_intel_required_subgroup_size to enumerate allowed sub-group sizes; .TP .B cl_intel_command_queue_families to enumerate the available command queues and their properties and capabilities; .TP .B cl_altera_device_temperature for the Altera extension to query the core temperature of the device; .TP .B cl_qcom_ext_host_ptr for the QUALCOMM extension to query page size and required padding in external memory allocation. .SH NOTES Some information is duplicated when available from multiple sources. Examples: .IP \(bu 2 supported device partition types and domains as obtained using the .B cl_ext_device_fission extension typically match the ones obtained using the core OpenCL 1.2 device partition feature; .IP \(bu the preferred work-group size multiple matches the NVIDIA warp size (on NVIDIA devices) or the AMD wavefront width (on AMD devices). .P Some floating-point configuration flags may only be meaningful for specific precisions and/or specific OpenCL versions. For example, .B CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only relevant for single precision in OpenCL 1.2 devices. .P The implementation-defined behavior for NULL platform or context properties is tested for the following API calls: .TP 2 .B clGetPlatformInfo() by trying to show the platform name; .TP .B clGetDeviceIDs() by trying to enumerate devices; the corresponding platform (if any) is then detected by querying the device platform of the first device; .TP .B clCreateteContext() by trying to create a context from a device from the previous list (if any), and a context from a device from a different platform; .TP .B clCreateteContextFromType() by trying to create contexts for each device type (except DEFAULT). .SH EXPERIMENTAL FEATURES .P Support for OpenCL 2.x properties is not fully tested. .P Support for .B cl_khr_subgroup_named_barrier is experimental due to missing definitions in the official OpenCL headers. .P Raw (machine-parsable) output is considered experimental, the output format might still undergo changes. .P The properties of the ICD loader will also be queried if the .B clGetICDLoaderInfoOCLICD extension function is found. .P Support for the properties exposed by .B cl_amd_copy_buffer_p2p is experimental. .P Support for some (documented and undocumented) properties exposed by .B cl_amd_device_attribute_query is experimental (see also .BR LIMITATIONS ). .P Support for the interop lists exposed by .B cl_intel_simultaneous_sharing is experimental. .P The highest OpenCL version supported by the ICD loader is detected with some trivial heuristics (symbols found); a notice is output if this is lower than the highest platform OpenCL version, or if the detected version doesn't match the one declared by the ICD loader itself. .SH LIMITATIONS .P OpenCL did not provide an explicit mean to detect the supported version of any extension exposed by a device until version 3.0. This makes it impossible in many circumstances to determine a priori if it will be possible to successfully query a device about a specific property even if it declares support for a given extension. Additionally, the actual size and meaning of some properties are not officially declared anywhere. .P Most notably, this affects extensions such as .BR cl_amd_device_attribute_query , .B cl_nv_device_attribute_query and .BR cl_arm_core_id . Heuristics based on standard version support are partially used in the code to determine which version may be supported. .P Properties which are known to be affected by these limitations include: .TP 2 .B CL_DEVICE_GLOBAL_FREE_MEMORY_AMD documented in v3 of the .B cl_amd_device_attribute_query extension specification as being the global free memory in KBytes, without any explanation given on why there are two values, although in the source code of the .B ROCm stack the second value is documented as being the largest free block; .TP .B CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD documented in v3 of the .B cl_amd_device_attribute_query extension specification, but not reported by drivers supporting other v3 properties. This has now been enabled for drivers .I assumed to support v4 of the same extension; .TP .B CL_DEVICE_TERMINATE_CAPABILITY_KHR exposed by the .B cl_khr_terminate_context has changed value between OpenCL 1.x and 2.x, and it's .I allegedly a bitfield, whose values are however not defined anywhere. .SH BUGS .SS General .P Please report any issues on .UR http://github.com/Oblomov/clinfo the project tracker on GitHub .UE . .SS LLVM CommandLine errors .P If multiple OpenCL platforms using shared .B LLVM libraries are present in the system, .B clinfo (and other OpenCL application) may crash with errors to the tune of .PP .nf .RS .B : CommandLine Error: Option '(some option name)' registered more than once! .B LLVM ERROR: inconsistency in registered CommandLine options .RE .fi .PP or similar. This is not an issue in .BR clinfo "," or in any OpenCL platform or application, but it is due to the way .B LLVM handles its own command-line options parsing. The issue has been reported upstream .UR https://bugs.llvm.org/show_bug.cgi?id=30587 as issue #30587 .UE . See the next point for possible workarounds and assistance in identifying the conflicting platforms. .SS Segmentation faults .P Faulty OpenCL platforms may cause segmentation faults in .B clinfo during the information gathering phase, sometimes even before any output is shown. There is very little .B clinfo can do to avoid this. If you see this happening, try disabling all platforms and then re-enabling them one by one until you experience the crash again. Chances are the last platform you enabled is defective in some way (either by being incompatible with other platforms or by missing necessary components and not handling their absence gracefully). .P To selectively enable/disable platforms, one way is to move or rename the .I *.icd files present in .I /etc/OpenCL/vendors/ and then restoring them one by one. When using the free-software .B ocl-icd OpenCL library, a similar effect can be achieved by setting the .B OPENCL_VENDOR_PATH or .B OCL_ICD_VENDORS environment variables, as documented in .BR libOpenCL (7). Other implementations of .B libOpenCL are known to support .B OPENCL_VENDOR_PATH too. .TP 2 .B Example find /etc/OpenCL/vendors/ -name '*.icd' | while read OPENCL_VENDOR_PATH ; do clinfo -l > /dev/null ; echo "$? ${OPENCL_VENDOR_PATH}" ; done .P This one liner will run .B clinfo -l for each platform individually (hiding the normal output), and report the .I .icd path prefixed by .B 0 for successful runs, and a non-zero value for faulty platforms. clinfo-3.0.25.02.14/new-version.sh000077500000000000000000000013141475367065600163530ustar00rootroot00000000000000#!/bin/sh # Change the version recorded in src/clinfo.c and man1/clinfo.1 to # the current highest OpenCL supported standard followed by current # yy.mm.dd abort() { echo "$1" >&2 exit 1 } test -n "$(git status --porcelain | grep -v '??')" && abort "Uncommited changes, aborting" DATE=$(date +%Y-%m-%d) MAJOR=$(awk '/^OpenCL/ { print $NF ; exit }' man1/clinfo.1) SUBV=$(date +%y.%m.%d) VERSION="$MAJOR$SUBV" sed -i -e "/clinfo version/ s/version \S\+\"/version $VERSION\"/" src/clinfo.c && sed -i -e "1 s/\".\+$/\"$DATE\" \"clinfo $VERSION\"/" man1/clinfo.1 && sed -i -e "1 s/\".\+$/version: $VERSION-{build}/" .appveyor.yml && git commit -m "Version $VERSION" -e -a && git tag -m "Version $VERSION" $VERSION clinfo-3.0.25.02.14/src/000077500000000000000000000000001475367065600143305ustar00rootroot00000000000000clinfo-3.0.25.02.14/src/clinfo.c000066400000000000000000005061171475367065600157600ustar00rootroot00000000000000/* Collect all available information on all available devices * on all available OpenCL platforms present in the system */ #include #include /* We will want to check for symbols in the OpenCL library. * On Windows, we must get the module handle for it, on Unix-like * systems we can just use RTLD_DEFAULT */ #ifdef _MSC_VER # include # define dlsym GetProcAddress # define DL_MODULE GetModuleHandle("OpenCL") #else # include #ifdef RTLD_DEFAULT # define DL_MODULE RTLD_DEFAULT #else # define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */ #endif #endif /* Load STDC format macros (PRI*), or define them * for those crappy, non-standard compilers */ #include "fmtmacros.h" // More support for the horrible MS C compiler #ifdef _MSC_VER #include "ms_support.h" #endif #include "error.h" #include "memory.h" #include "strbuf.h" #include "ext.h" #include "ctx_prop.h" #include "info_loc.h" #include "info_ret.h" #include "opt_out.h" #define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar)) #ifndef UNUSED #define UNUSED(x) x __attribute__((unused)) #endif struct platform_data { char *pname; /* CL_PLATFORM_NAME */ char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */ cl_uint ndevs; /* number of devices */ cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */ }; struct platform_info_checks { cl_uint plat_version; cl_bool has_khr_icd; cl_bool has_amd_object_metadata; cl_bool has_extended_versioning; cl_bool has_external_memory; cl_bool has_semaphore; cl_bool has_external_semaphore; }; struct platform_list { /* Number of platforms in the system */ cl_uint num_platforms; /* Total number of devices across all platforms */ cl_uint ndevs_total; /* Number of devices allocated in all_devs array */ cl_uint alloc_devs; /* Highest OpenCL version supported by any platform. * If the OpenCL library / ICD loader only supports * a lower version, problems may arise (such as * API calls causing segfaults or any other unexpected * behavior */ cl_uint max_plat_version; /* Largest number of devices on any platform */ cl_uint max_devs; /* Length of the longest platform sname */ size_t max_sname_len; /* Array of platform IDs */ cl_platform_id *platform; /* Array of device IDs (across all platforms) */ cl_device_id *all_devs; /* Array of offsets in all_devs where the devices * of each platform begin */ cl_uint *dev_offset; /* Array of clinfo-specific platform data */ struct platform_data *pdata; /* Array of clinfo-specific platform checks */ struct platform_info_checks *platform_checks; }; void init_plist(struct platform_list *plist) { plist->num_platforms = 0; plist->ndevs_total = 0; plist->alloc_devs = 0; plist->max_plat_version = 0; plist->max_devs = 0; plist->max_sname_len = 0; plist->platform = NULL; plist->all_devs = NULL; plist->dev_offset = NULL; plist->pdata = NULL; plist->platform_checks = NULL; } void plist_devs_reserve(struct platform_list *plist, cl_uint amount) { if (amount > plist->alloc_devs) { REALLOC(plist->all_devs, amount, "all devices"); plist->alloc_devs = amount; } } cl_uint alloc_plist(struct platform_list *plist, const struct opt_out *output) { cl_uint num_platforms = plist->num_platforms; if (output->null_platform) num_platforms += 1; ALLOC(plist->platform, num_platforms, "platform IDs"); ALLOC(plist->dev_offset, num_platforms, "platform device list offset"); /* The actual sizing for this will change as we gather platform info, * but assume at least one device per platform */ plist_devs_reserve(plist, num_platforms); ALLOC(plist->pdata, num_platforms, "platform data"); ALLOC(plist->platform_checks, num_platforms, "platform checks data"); return num_platforms; } void free_plist(struct platform_list *plist) { free(plist->platform); free(plist->all_devs); free(plist->dev_offset); for (cl_uint p = 0 ; p < plist->num_platforms; ++p) { free(plist->pdata[p].sname); free(plist->pdata[p].pname); } free(plist->pdata); free(plist->platform_checks); init_plist(plist); } const cl_device_id * get_platform_devs(const struct platform_list *plist, cl_uint p) { return plist->all_devs + plist->dev_offset[p]; } cl_device_id get_platform_dev(const struct platform_list *plist, cl_uint p, cl_uint d) { return get_platform_devs(plist, p)[d]; } /* Data for the OpenCL library / ICD loader */ struct icdl_data { /* auto-detected OpenCL version support for the ICD loader */ cl_uint detected_version; /* OpenCL version support declared by the ICD loader */ cl_uint reported_version; }; /* line prefix, used to identify the platform/device for each * device property in RAW output mode */ char *line_pfx; int line_pfx_len; #define CHECK_SIZE(ret, loc, val, cmd, ...) do { \ /* check if the issue is with param size */ \ if (output->check_size && ret->err == CL_INVALID_VALUE) { \ size_t _actual_sz; \ if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { \ REPORT_SIZE_MISMATCH(&(ret->err_str), loc, _actual_sz, sizeof(val)); \ } \ } \ } while (0) static const char unk[] = "Unknown"; static const char none[] = "None"; static const char none_raw[] = "CL_NONE"; static const char na[] = "n/a"; // not available static const char na_wrap[] = "(n/a)"; // not available static const char core[] = "core"; static const char bytes_str[] = " bytes"; static const char pixels_str[] = " pixels"; static const char images_str[] = " images"; static const char* bool_str[] = { "No", "Yes" }; static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" }; static const char* bool_json_str[] = { "false", "true" }; static const char* endian_str[] = { "Big-Endian", "Little-Endian" }; static const cl_device_type devtype[] = { 0, CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL }; const size_t devtype_count = ARRAY_SIZE(devtype); /* number of actual device types, without ALL */ const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1; static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" }; static const char* device_type_raw_str[] = { unk, "CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL" }; static const char* partition_type_str[] = { none, "equally", "by counts", "by affinity domain", "by names (Intel)" }; static const char* partition_type_raw_str[] = { none_raw, "CL_DEVICE_PARTITION_EQUALLY_EXT", "CL_DEVICE_PARTITION_BY_COUNTS_EXT", "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT", "CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT" }; static const char* atomic_cap_str[] = { "relaxed", "acquire/release", "sequentially-consistent", "work-item scope", "work-group scope", "device scope", "all-devices scope" }; static const char* atomic_cap_raw_str[] = { "CL_DEVICE_ATOMIC_ORDER_RELAXED", "CL_DEVICE_ATOMIC_ORDER_ACQ_REL", "CL_DEVICE_ATOMIC_ORDER_SEQ_CST", "CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM", "CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP", "CL_DEVICE_ATOMIC_SCOPE_DEVICE", "CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES" }; const size_t atomic_cap_count = ARRAY_SIZE(atomic_cap_str); static const char *device_enqueue_cap_str[] = { "supported", "replaceable default queue" }; static const char *device_enqueue_cap_raw_str[] = { "CL_DEVICE_QUEUE_SUPPORTED", "CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT" }; const size_t device_enqueue_cap_count = ARRAY_SIZE(atomic_cap_str); static const char *command_buffer_str[] = { "kernel printf", "device side enqueue", "simultaneous use", "out of order", }; static const char *command_buffer_raw_str[] = { "CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR", "CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR", "CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR", "CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR", }; const size_t command_buffer_count = ARRAY_SIZE(command_buffer_str); static const char *mutable_dispatch_str[] = { "Global Offset", "Local Offset", "Local Size", "Arguments", "Exec Info", }; static const char *mutable_dispatch_raw_str[] = { "CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR", "CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR", "CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR", "CL_MUTABLE_DISPATCH_ARGUMENTS_KHR", "CL_MUTABLE_DISPATCH_EXEC_INFO_KHR", }; const size_t mutable_dispatch_count = ARRAY_SIZE(mutable_dispatch_str); static const char numa[] = "NUMA"; static const char l1cache[] = "L1 cache"; static const char l2cache[] = "L2 cache"; static const char l3cache[] = "L3 cache"; static const char l4cache[] = "L4 cache"; static const char* affinity_domain_str[] = { numa, l4cache, l3cache, l2cache, l1cache, "next partitionable" }; static const char* affinity_domain_ext_str[] = { numa, l4cache, l3cache, l2cache, l1cache, "next fissionable" }; static const char* affinity_domain_raw_str[] = { "CL_DEVICE_AFFINITY_DOMAIN_NUMA", "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE", "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE" }; static const char* affinity_domain_raw_ext_str[] = { "CL_AFFINITY_DOMAIN_NUMA_EXT", "CL_AFFINITY_DOMAIN_L4_CACHE_EXT", "CL_AFFINITY_DOMAIN_L3_CACHE_EXT", "CL_AFFINITY_DOMAIN_L2_CACHE_EXT", "CL_AFFINITY_DOMAIN_L1_CACHE_EXT", "CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT" }; const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str); static const char *terminate_capability_str[] = { "Context" }; static const char *terminate_capability_raw_str[] = { "CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR" }; const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str); static const char *terminate_capability_arm_str[] = { "Controlled Success", "Controlled Failurure", "Query" }; static const char * terminate_capability_arm_raw_str[] = { "CL_DEVICE_CONTROLLED_TERMINATION_SUCCESS_ARM", "CL_DEVICE_CONTROLLED_TERMINATION_FAILURE_ARM", "CL_DEVICE_CONTROLLED_TERMINATION_QUERY_ARM" }; const size_t terminate_capability_arm_count = ARRAY_SIZE(terminate_capability_arm_str); static const char* fp_conf_str[] = { "Denormals", "Infinity and NANs", "Round to nearest", "Round to zero", "Round to infinity", "IEEE754-2008 fused multiply-add", "Support is emulated in software", "Correctly-rounded divide and sqrt operations" }; static const char* fp_conf_raw_str[] = { "CL_FP_DENORM", "CL_FP_INF_NAN", "CL_FP_ROUND_TO_NEAREST", "CL_FP_ROUND_TO_ZERO", "CL_FP_ROUND_TO_INF", "CL_FP_FMA", "CL_FP_SOFT_FLOAT", "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT" }; const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str); static const char* fp_atomic_caps_str[] = { "Global Load/Store", "Global Add", "Global Min/Max", "Unknown bit 3" "Unknown bit 4" "Unknown bit 5" "Unknown bit 6" "Unknown bit 7" "Unknown bit 8" "Unknown bit 9" "Unknown bit 10" "Unknown bit 11" "Unknown bit 12" "Unknown bit 13" "Unknown bit 14" "Unknown bit 15" "Local Load/Store", "Local Add", "Local Min/Max" }; static const char* fp_atomic_caps_raw_str[] = { "CL_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT", "CL_GLOBAL_FP_ATOMIC_ADD_EXT", "CL_GLOBAL_FP_ATOMIC_MIN_MAX_EXT" "CL_UNKNOWN_FP_ATOMIC_BIT_3" "CL_UNKNOWN_FP_ATOMIC_BIT_4" "CL_UNKNOWN_FP_ATOMIC_BIT_5" "CL_UNKNOWN_FP_ATOMIC_BIT_6" "CL_UNKNOWN_FP_ATOMIC_BIT_7" "CL_UNKNOWN_FP_ATOMIC_BIT_8" "CL_UNKNOWN_FP_ATOMIC_BIT_9" "CL_UNKNOWN_FP_ATOMIC_BIT_10" "CL_UNKNOWN_FP_ATOMIC_BIT_11" "CL_UNKNOWN_FP_ATOMIC_BIT_12" "CL_UNKNOWN_FP_ATOMIC_BIT_13" "CL_UNKNOWN_FP_ATOMIC_BIT_14" "CL_UNKNOWN_FP_ATOMIC_BIT_15" "CL_LOCAL_FP_ATOMIC_LOAD_STORE_EXT", "CL_LOCAL_FP_ATOMIC_ADD_EXT", "CL_LOCAL_FP_ATOMIC_MIN_MAX_EXT", }; /* There are three global and three local flags. This will be handled in device_info_fp_atomic_caps */ const size_t fp_atomic_caps_count = 3; static const char* svm_cap_str[] = { "Coarse-grained buffer sharing", "Fine-grained buffer sharing", "Fine-grained system sharing", "Atomics" }; static const char* svm_cap_raw_str[] = { "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER", "CL_DEVICE_SVM_FINE_GRAIN_BUFFER", "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM", "CL_DEVICE_SVM_ATOMICS", }; const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str); static const char * intel_usm_cap_str[] = { "USM access", "USM atomic access", "USM concurrent access", "USM concurrent atomic access" }; static const char * intel_usm_cap_raw_str[] = { "CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL", "CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL", "CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL", "CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL", }; const size_t intel_usm_cap_count = ARRAY_SIZE(intel_usm_cap_str); static const char* arm_scheduling_controls_str[] = { "Kernel batching", "Work-group batch size", "Work-group batch size modifier", "Deferred flush", "Register allocation", "Warp throttling", "Compute unit batch queue size", "Compute unit limit", }; static const char* arm_scheduling_controls_raw_str[] = { "CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM", "CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM", "CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM", "CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM", "CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM", "CL_DEVICE_SCHEDULING_WARP_THROTTLING_ARM", "CL_DEVICE_SCHEDULING_COMPUTE_UNIT_BATCH_QUEUE_SIZE_ARM", "CL_DEVICE_SCHEDULING_COMPUTE_UNIT_LIMIT_ARM", }; const size_t arm_scheduling_controls_count = ARRAY_SIZE(arm_scheduling_controls_str); static const char* ext_mem_handle_str[] = { "Opaque FD", "Opaqe Win32", "Opaque Win32 KMT", "D3D11 Texture", "D3D11 Texture KMT", "D3D12 Heap", "D3D12 Resource", "DMA buffer" }; static const char* ext_mem_handle_raw_str[] = { "CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR", "CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR", "CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR", "CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR", "CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR", "CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR", "CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR", "CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR", }; const size_t ext_mem_handle_count = ARRAY_SIZE(ext_mem_handle_str); const size_t ext_mem_handle_offset = 0x2060; static const char* semaphore_type_str[] = { "Binary" }; static const char* semaphore_type_raw_str[] = { "CL_SEMAPHORE_TYPE_BINARY_KHR" }; const size_t semaphore_type_count = ARRAY_SIZE(semaphore_type_str); const size_t semaphore_type_offset = 1; static const char* semaphore_handle_str[] = { "Opaque FD", "Opaque Win32", "Opaque Win32 KMT", "Sync FD", "D3D12 Fence" }; static const char* semaphore_handle_raw_str[] = { "CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR", "CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR", "CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR", "CL_SEMAPHORE_HANDLE_SYNC_FD_KHR", "CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR", }; const size_t semaphore_handle_count = ARRAY_SIZE(semaphore_handle_str); const size_t semaphore_handle_offset = 0x2055; /* SI suffixes for memory sizes. Note that in OpenCL most of them are * passed via a cl_ulong, which at most can mode 16 EiB, but hey, * let's be forward-thinking ;-) */ static const char* memsfx[] = { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" }; const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1; static const char* lmem_type_str[] = { none, "Local", "Global" }; static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" }; static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" }; static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" }; static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" }; static const char* queue_prop_raw_str[] = { "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", "CL_QUEUE_PROFILING_ENABLE" }; const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str); static const char* intel_queue_cap_str[] = { "create single-queue events", "create cross-queue events", "single-queue wait list", "cross-queue wait list", "unknown (bit 4)", "unknown (bit 5)", "unknown (bit 6)", "unknown (bit 7)", "transfer buffer", "transfer buffer rect", "map buffer", "fill buffer", "transfer image", "map image", "fill image", "transfer buffer to image", "transfer image to buffer", "unknown (bit 17)", "unknown (bit 18)", "unknown (bit 19)", "unknown (bit 20)", "unknown (bit 21)", "unknown (bit 22)", "unknown (bit 23)", "marker enqueue", "barrier enqueue", "kernel enqueue", "unknown (bit 27)", "unknown (bit 28)", "no sync operations", }; static const char* intel_queue_cap_raw_str[] = { "CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL", "CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL", "CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL", "CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL", "CL_QUEUE_CAPABILITY_UNKNOWN_4", "CL_QUEUE_CAPABILITY_UNKNOWN_5", "CL_QUEUE_CAPABILITY_UNKNOWN_6", "CL_QUEUE_CAPABILITY_UNKNOWN_7", "CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL", "CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL", "CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL", "CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL", "CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL", "CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL", "CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL", "CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL", "CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL", "CL_QUEUE_CAPABILITY_UNKNOWN_17", "CL_QUEUE_CAPABILITY_UNKNOWN_18", "CL_QUEUE_CAPABILITY_UNKNOWN_19", "CL_QUEUE_CAPABILITY_UNKNOWN_20", "CL_QUEUE_CAPABILITY_UNKNOWN_21", "CL_QUEUE_CAPABILITY_UNKNOWN_22", "CL_QUEUE_CAPABILITY_UNKNOWN_23", "CL_QUEUE_CAPABILITY_MARKER_INTEL", "CL_QUEUE_CAPABILITY_BARRIER_INTEL", "CL_QUEUE_CAPABILITY_KERNEL_INTEL", "CL_QUEUE_CAPABILITY_UNKNOWN_27", "CL_QUEUE_CAPABILITY_UNKNOWN_28", "CL_QUEUE_NO_SYNC_OPERATIONS_INTEL", }; const size_t intel_queue_cap_count = ARRAY_SIZE(intel_queue_cap_str); static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" }; static const char* execap_raw_str[] = { "CL_EXEC_KERNEL", "CL_EXEC_NATIVE_KERNEL" }; const size_t execap_count = ARRAY_SIZE(execap_str); static const char* intel_features_str[] = { "DP4A", "DPAS" }; static const char* intel_features_raw_str[] = { "CL_DEVICE_FEATURE_FLAG_DP4A_INTEL", "CL_DEVICE_FEATURE_FLAG_DPAS_INTEL" }; const size_t intel_features_count = ARRAY_SIZE(intel_features_str); static const char* sources[] = { "#define GWO(type) global type* restrict\n", "#define GRO(type) global const type* restrict\n", "#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n", "#define _KRN(T, N) kernel void sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n", "#define KRN(N) _KRN(float, N)\n", "KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n", }; const char *num_devs_header(const struct opt_out *output, cl_bool these_are_offline) { return output->mode == CLINFO_HUMAN ? (these_are_offline ? "Number of offine devices (AMD)" : "Number of devices") : (these_are_offline ? "#OFFDEVICES" : "#DEVICES"); } const char *not_specified(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? na_wrap : ""; } const char *no_plat(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "No platform" : "CL_INVALID_PLATFORM"; } const char *invalid_dev_type(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "Invalid device type for platform" : "CL_INVALID_DEVICE_TYPE"; } const char *invalid_dev_value(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "Invalid device type value for platform" : "CL_INVALID_VALUE"; } const char *no_dev_found(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "No devices found in platform" : "CL_DEVICE_NOT_FOUND"; } const char *no_dev_avail(const struct opt_out *output) { return output->mode == CLINFO_HUMAN ? "No devices available in platform" : "CL_DEVICE_NOT_AVAILABLE"; } /* OpenCL context interop names */ typedef struct cl_interop_name { cl_uint from; cl_uint to; /* 5 because that's the largest we know of, * 2 because it's HUMAN, RAW */ const char *value[5][2]; } cl_interop_name; static const cl_interop_name cl_interop_names[] = { { /* cl_khr_gl_sharing */ CL_GL_CONTEXT_KHR, CL_CGL_SHAREGROUP_KHR, { { "GL", "CL_GL_CONTEXT_KHR" }, { "EGL", "CL_EGL_DISPALY_KHR" }, { "GLX", "CL_GLX_DISPLAY_KHR" }, { "WGL", "CL_WGL_HDC_KHR" }, { "CGL", "CL_CGL_SHAREGROUP_KHR" } } }, { /* cl_khr_dx9_media_sharing */ CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_DXVA_KHR, { { "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" }, { "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" }, { "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" } } }, { /* cl_khr_d3d10_sharing */ CL_CONTEXT_D3D10_DEVICE_KHR, CL_CONTEXT_D3D10_DEVICE_KHR, { { "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" } } }, { /* cl_khr_d3d11_sharing */ CL_CONTEXT_D3D11_DEVICE_KHR, CL_CONTEXT_D3D11_DEVICE_KHR, { { "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" } } }, /* cl_intel_dx9_media_sharing is split in two because the allowed values are not consecutive */ { /* cl_intel_dx9_media_sharing part 1 */ CL_CONTEXT_D3D9_DEVICE_INTEL, CL_CONTEXT_D3D9_DEVICE_INTEL, { { "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" } } }, { /* cl_intel_dx9_media_sharing part 2 */ CL_CONTEXT_D3D9EX_DEVICE_INTEL, CL_CONTEXT_DXVA_DEVICE_INTEL, { { "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" }, { "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" } } }, { /* cl_intel_va_api_media_sharing */ CL_CONTEXT_VA_API_DISPLAY_INTEL, CL_CONTEXT_VA_API_DISPLAY_INTEL, { { "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" } } } }; const size_t num_known_interops = ARRAY_SIZE(cl_interop_names); #define INDENT " " #define I0_STR "%-48s " #define I1_STR " %-46s " #define I2_STR " %-44s " /* New line and a full padding */ static const char full_padding[] = "\n" INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT INDENT; static const char empty_str[] = ""; static const char spc_str[] = " "; static const char times_str[] = "x"; static const char comma_str[] = ", "; static const char vbar_str[] = " | "; const char *cur_sfx = empty_str; /* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version. * Returns an unsigned integer in the form major*10 + minor */ cl_uint getOpenCLVersion(const char *version) { cl_uint ret = 10; long parse = 0; const char *from = version; char *next = NULL; parse = strtol(from, &next, 10); if (next != from) { ret = parse*10; // skip the dot TODO should we actually check for the dot? from = ++next; parse = strtol(from, &next, 10); if (next != from) ret += parse; } return ret; } #define SPLIT_CL_VERSION(ver) ((ver)/10), ((ver)%10) /* OpenCL 3.0 introduced “proper” versioning, in the form of a major.minor.patch struct * packed into a single cl_uint (type aliased to cl_version) */ struct unpacked_cl_version { cl_uint major; cl_uint minor; cl_uint patch; }; struct unpacked_cl_version unpack_cl_version(cl_uint version) { struct unpacked_cl_version ret; ret.major = (version >> 22); ret.minor = (version >> 12) & ((1<<10)-1); ret.patch = version & ((1<<12)-1); return ret; } void strbuf_version(const char *what, struct _strbuf *str, const char *before, cl_uint version, const char *after) { struct unpacked_cl_version u = unpack_cl_version(version); strbuf_append(what, str, "%s%" PRIu32 ".%" PRIu32 ".%" PRIu32 "%s", before, u.major, u.minor, u.patch, after); } void set_common_separator(const struct opt_out *output) { set_separator(output->json || output->mode == CLINFO_HUMAN ? comma_str : vbar_str); } void strbuf_name_version(const char *what, struct _strbuf *str, const cl_name_version *ext, size_t num_exts, const struct opt_out *output) { realloc_strbuf(str, num_exts*(CL_NAME_VERSION_MAX_NAME_SIZE + 256), "extension versions"); set_separator(output->mode == CLINFO_HUMAN ? full_padding : output->json ? comma_str : spc_str); if (output->json) { strbuf_append_str(what, str, "{"); } for (size_t i = 0; i < num_exts; ++i) { const cl_name_version *e = ext + i; if (i > 0) strbuf_append_str(what, str, sep); if (output->json || output->mode == CLINFO_HUMAN) { struct unpacked_cl_version u = unpack_cl_version(e->version); strbuf_append(what, str, output->json ? "\"%s\" : { \"raw\" : %" PRIu32 ", \"version\" : \"%d.%d.%d\" }" : "%-65s%#8" PRIx32 " (%d.%d.%d)", e->name, e->version, u.major, u.minor, u.patch); } else { strbuf_append(what, str, "%s:%#" PRIx32, e->name, e->version); } } if (output->json) strbuf_append_str(what, str, " }"); } void strbuf_named_uint(const char *what, struct _strbuf *str, const cl_uint *ext, size_t num_exts, const struct opt_out *output, const char* const* human_str, const char* const* raw_str, const size_t count, const size_t offset) { const char *quote = output->json ? "\"" : ""; const char * const * name_str = output->mode == CLINFO_HUMAN ? human_str : raw_str; set_common_separator(output); if (output->json) strbuf_append_str_len(what, str, "[ ", 2); for (size_t cursor = 0; cursor < num_exts; ++cursor) { /* add separator for values past the first */ if (cursor > 0) strbuf_append_str(what, str, sep); cl_uint val = ext[cursor]; cl_bool known = (val >= offset && val < offset + count); if (known) strbuf_append(what, str, "%s%s%s", quote, name_str[val - offset], quote); else strbuf_append(what, str, "%s%#" PRIx32 "%s", quote, val, quote); } if (output->json) strbuf_append_str_len(what, str, " ]", 2); } void strbuf_ext_mem(const char *what, struct _strbuf *str, const cl_external_memory_handle_type_khr *ext, size_t num_exts, const struct opt_out *output) { strbuf_named_uint(what, str, ext, num_exts, output, ext_mem_handle_str, ext_mem_handle_raw_str, ext_mem_handle_count, ext_mem_handle_offset); } void strbuf_semaphore_type(const char *what, struct _strbuf *str, const cl_semaphore_type_khr *ext, size_t num_exts, const struct opt_out *output) { strbuf_named_uint(what, str, ext, num_exts, output, semaphore_type_str, semaphore_type_raw_str, semaphore_type_count, semaphore_type_offset); } void strbuf_ext_semaphore_handle(const char *what, struct _strbuf *str, const cl_external_semaphore_handle_type_khr *ext, size_t num_exts, const struct opt_out *output) { strbuf_named_uint(what, str, ext, num_exts, output, semaphore_handle_str, semaphore_handle_raw_str, semaphore_handle_count, semaphore_handle_offset); } /* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero, * affixing cur_sfx */ static inline void show_strbuf(const struct _strbuf *strbuf, const char *pname, int skip, cl_int err) { printf("%s" I1_STR "%s%s\n", line_pfx, pname, (skip ? skip_leading_ws(strbuf->buf) : strbuf->buf), err ? empty_str : cur_sfx); } /* print a JSON string version of NULL-terminated string str, escaping \ and " and wrapping it all in " */ static inline void json_stringify(const char *str) { putchar('"'); while (*str) { if (*str == '\\' || *str == '"') putchar('\\'); putchar(*str); ++str; } putchar('"'); } /* print JSON version of strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero, * quoting and escaping as string if is_string is nonzero */ static inline void json_strbuf(const struct _strbuf *strbuf, const char *pname, cl_uint n, cl_bool is_string) { printf("%s\"%s\" : ", (n > 0 ? comma_str : spc_str), pname); if (is_string) json_stringify(strbuf->buf); else fputs(strbuf->buf, stdout); } void platform_info_str(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out* UNUSED(output)) { GET_STRING_LOC(ret, loc, clGetPlatformInfo, loc->plat, loc->param.plat); ret->needs_escaping = CL_TRUE; } void platform_info_ulong(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u64), &ret->value.u64, NULL), loc, "get %s"); CHECK_SIZE(ret, loc, ret->value.u64, clGetPlatformInfo, loc->plat, loc->param.plat); strbuf_append(loc->pname, &ret->str, "%" PRIu64, ret->value.u64); } void platform_info_sz(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.s), &ret->value.s, NULL), loc, "get %s"); CHECK_SIZE(ret, loc, ret->value.s, clGetPlatformInfo, loc->plat, loc->param.plat); strbuf_append(loc->pname, &ret->str, "%" PRIuS, ret->value.s); } void platform_info_version(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, sizeof(ret->value.u32), &ret->value.u32, NULL), loc, "get %s"); CHECK_SIZE(ret, loc, ret->value.u32, clGetPlatformInfo, loc->plat, loc->param.plat); if (!ret->err) { strbuf_append(loc->pname, &ret->str, output->json ? "{ \"raw\" : %" PRIu32 ", \"version\" :" : "%#" PRIx32, ret->value.u32); if (output->json || output->mode == CLINFO_HUMAN) { strbuf_version(loc->pname, &ret->str, output->json ? " \"" : " (", ret->value.u32, output->json ? "\" }" : ")"); } } } void platform_info_ext_version(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { cl_name_version *ext = NULL; size_t nusz = 0; ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, 0, NULL, &nusz), loc, "get %s size"); if (!ret->err) { REALLOC(ext, nusz, loc->sname); ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, nusz, ext, NULL), loc, "get %s"); } if (!ret->err) { size_t num_exts = nusz / sizeof(*ext); strbuf_name_version(loc->pname, &ret->str, ext, num_exts, output); } free(ext); } void platform_info_ext_mem(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { cl_external_memory_handle_type_khr *ext = NULL; size_t nusz = 0; ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, 0, NULL, &nusz), loc, "get %s size"); if (!ret->err) { REALLOC(ext, nusz, loc->sname); ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, nusz, ext, NULL), loc, "get %s"); } if (!ret->err) { size_t num_exts = nusz / sizeof(*ext); strbuf_ext_mem(loc->pname, &ret->str, ext, num_exts, output); } free(ext); } void platform_info_semaphore_types(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { cl_semaphore_type_khr *ext = NULL; size_t nusz = 0; ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, 0, NULL, &nusz), loc, "get %s size"); if (!ret->err) { REALLOC(ext, nusz, loc->sname); ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, nusz, ext, NULL), loc, "get %s"); } if (!ret->err) { size_t num_exts = nusz / sizeof(*ext); strbuf_semaphore_type(loc->pname, &ret->str, ext, num_exts, output); } free(ext); } void platform_info_ext_semaphore_handles(struct platform_info_ret *ret, const struct info_loc *loc, const struct platform_info_checks* UNUSED(chk), const struct opt_out *output) { cl_external_semaphore_handle_type_khr *ext = NULL; size_t nusz = 0; ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, 0, NULL, &nusz), loc, "get %s size"); if (!ret->err) { REALLOC(ext, nusz, loc->sname); ret->err = REPORT_ERROR_LOC(ret, clGetPlatformInfo(loc->plat, loc->param.plat, nusz, ext, NULL), loc, "get %s"); } if (!ret->err) { size_t num_exts = nusz / sizeof(*ext); strbuf_ext_semaphore_handle(loc->pname, &ret->str, ext, num_exts, output); } free(ext); } struct platform_info_traits { cl_platform_info param; // CL_PLATFORM_* const char *sname; // "CL_PLATFORM_*" const char *pname; // "Platform *" const char *sfx; // suffix for the output in non-raw mode /* pointer to function that retrieves the parameter */ void (*show_func)(struct platform_info_ret *, const struct info_loc *, const struct platform_info_checks *, const struct opt_out *); /* pointer to function that checks if the parameter should be retrieved */ cl_bool (*check_func)(const struct platform_info_checks *); }; cl_bool khr_icd_p(const struct platform_info_checks *chk) { return chk->has_khr_icd; } cl_bool plat_is_12(const struct platform_info_checks *chk) { return !(chk->plat_version < 12); } cl_bool plat_is_20(const struct platform_info_checks *chk) { return !(chk->plat_version < 20); } cl_bool plat_is_21(const struct platform_info_checks *chk) { return !(chk->plat_version < 21); } cl_bool plat_is_30(const struct platform_info_checks *chk) { return !(chk->plat_version < 30); } cl_bool plat_has_amd_object_metadata(const struct platform_info_checks *chk) { return chk->has_amd_object_metadata; } cl_bool plat_has_ext_ver(const struct platform_info_checks *chk) { return plat_is_30(chk) || chk->has_extended_versioning; } cl_bool plat_has_ext_mem(const struct platform_info_checks *chk) { return chk->has_external_memory; } cl_bool plat_has_semaphore(const struct platform_info_checks *chk) { return chk->has_semaphore; } cl_bool plat_has_external_semaphore(const struct platform_info_checks *chk) { return chk->has_external_semaphore; } #define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr } #define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL } struct platform_info_traits pinfo_traits[] = { PINFO(CL_PLATFORM_NAME, "Name", NULL, str), PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str), PINFO(CL_PLATFORM_VERSION, "Version", NULL, str), PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str), PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str), PINFO_COND(CL_PLATFORM_EXTENSIONS_WITH_VERSION, "Extensions with Version", NULL, ext_version, plat_has_ext_ver), PINFO_COND(CL_PLATFORM_NUMERIC_VERSION, "Numeric Version", NULL, version, plat_has_ext_ver), PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p), PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata), PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21), PINFO_COND(CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, "External memory handle types", NULL, ext_mem, plat_has_ext_mem), PINFO_COND(CL_PLATFORM_SEMAPHORE_TYPES_KHR, "Semaphore types", NULL, semaphore_types, plat_has_semaphore), PINFO_COND(CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, "External semaphore import types", NULL, ext_semaphore_handles, plat_has_external_semaphore), PINFO_COND(CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, "External semaphore export types", NULL, ext_semaphore_handles, plat_has_external_semaphore), }; /* Collect (and optionally show) information on a specific platform, * initializing relevant arrays and optionally showing the collected * information */ void gatherPlatformInfo(struct platform_list *plist, cl_uint p, const struct opt_out *output) { size_t len = 0; cl_uint n = 0; /* number of platform properties shown, for JSON */ struct platform_data *pdata = plist->pdata + p; struct platform_info_checks *pinfo_checks = plist->platform_checks + p; struct platform_info_ret ret; struct info_loc loc; pinfo_checks->plat_version = 10; INIT_RET(ret, "platform"); reset_loc(&loc, __func__); loc.plat = plist->platform[p]; for (loc.line = 0; loc.line < ARRAY_SIZE(pinfo_traits); ++loc.line) { const struct platform_info_traits *traits = pinfo_traits + loc.line; cl_bool requested; /* checked is true if there was no condition to check for, or if the * condition was satisfied */ int checked = !(traits->check_func && !traits->check_func(pinfo_checks)); if (output->cond == COND_PROP_CHECK && !checked) continue; loc.sname = traits->sname; loc.pname = (output->mode == CLINFO_HUMAN ? traits->pname : traits->sname); loc.param.plat = traits->param; cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str; reset_strbuf(&ret.str); reset_strbuf(&ret.err_str); ret.needs_escaping = CL_FALSE; traits->show_func(&ret, &loc, pinfo_checks, output); /* The property is skipped if this was a conditional property, * unsatisfied, there was an error retrieving it and cond_prop_mode is not * COND_PROP_SHOW. */ if (ret.err && !checked && output->cond != COND_PROP_SHOW) continue; /* The property gets printed if we are not just listing, * or if the user requested a property and this one matches. * Otherwise, we're just gathering information */ requested = is_requested_prop(output, loc.sname); if (output->detailed || requested) { if (output->json) { json_strbuf(RET_BUF(ret), loc.pname, n++, ret.err || ret.needs_escaping); } else { show_strbuf(RET_BUF(ret), loc.pname, CL_FALSE, ret.err); } } if (ret.err) continue; /* post-processing */ switch (traits->param) { case CL_PLATFORM_NAME: /* Store name for future reference */ len = strlen(ret.str.buf); ALLOC(pdata->pname, len+1, "platform name copy"); /* memcpy instead of strncpy since we already have the len * and memcpy is possibly more optimized */ memcpy(pdata->pname, ret.str.buf, len); pdata->pname[len] = '\0'; /* We print the platform name here in the JSON + brief case */ if (output->json && output->brief) json_stringify(pdata->pname); break; case CL_PLATFORM_VERSION: /* compute numeric value for OpenCL version */ pinfo_checks->plat_version = getOpenCLVersion(ret.str.buf + 7); break; case CL_PLATFORM_EXTENSIONS: pinfo_checks->has_khr_icd = !!strstr(ret.str.buf, "cl_khr_icd"); pinfo_checks->has_amd_object_metadata = !!strstr(ret.str.buf, "cl_amd_object_metadata"); pinfo_checks->has_external_memory = !!strstr(ret.str.buf, "cl_khr_external_memory"); pinfo_checks->has_semaphore = !!strstr(ret.str.buf, "cl_khr_semaphore"); pinfo_checks->has_external_semaphore = !!strstr(ret.str.buf, "cl_khr_external_semaphore"); pdata->has_amd_offline = !!strstr(ret.str.buf, "cl_amd_offline_devices"); break; case CL_PLATFORM_ICD_SUFFIX_KHR: /* Store ICD suffix for future reference */ len = strlen(ret.str.buf); ALLOC(pdata->sname, len+1, "platform ICD suffix copy"); /* memcpy instead of strncpy since we already have the len * and memcpy is possibly more optimized */ memcpy(pdata->sname, ret.str.buf, len); pdata->sname[len] = '\0'; default: /* do nothing */ break; } } if (pinfo_checks->plat_version > plist->max_plat_version) plist->max_plat_version = pinfo_checks->plat_version; /* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */ if (!pdata->sname) { #define SNAME_MAX 32 ALLOC(pdata->sname, SNAME_MAX+1, "platform symbolic name"); snprintf(pdata->sname, SNAME_MAX, "P%" PRIu32 "", p); } len = strlen(pdata->sname); if (len > plist->max_sname_len) plist->max_sname_len = len; ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, 0, NULL, &pdata->ndevs); if (ret.err == CL_DEVICE_NOT_FOUND) pdata->ndevs = 0; else CHECK_ERROR(ret.err, "number of devices"); plist->ndevs_total += pdata->ndevs; plist->dev_offset[p] = p ? plist->dev_offset[p-1] + (pdata-1)->ndevs : 0; plist_devs_reserve(plist, plist->ndevs_total); if (pdata->ndevs > 0) { ret.err = clGetDeviceIDs(loc.plat, CL_DEVICE_TYPE_ALL, pdata->ndevs, plist->all_devs + plist->dev_offset[p], NULL); } if (pdata->ndevs > plist->max_devs) plist->max_devs = pdata->ndevs; UNINIT_RET(ret); } /* * Device properties/extensions used in traits checks, and relevant functions * TODO add version control for 3.0+ platforms */ struct device_info_checks { const struct platform_info_checks *pinfo_checks; cl_device_type devtype; cl_device_mem_cache_type cachetype; cl_device_local_mem_type lmemtype; cl_bool image_support; cl_bool compiler_available; cl_bool arm_register_alloc_support; cl_bool arm_warp_count_support; char has_half[12]; char has_double[24]; char has_nv[29]; char has_amd[30]; char has_intel[32]; char has_amd_svm[11]; char has_arm_svm[29]; char has_intel_usm[31]; char has_external_memory[23]; char has_semaphore[17]; char has_external_semaphore[26]; char has_arm_core_id[15]; char has_arm_job_slots[26]; char has_arm_scheduling_controls[27]; char has_fission[22]; char has_atomic_counters[26]; char has_image2d_buffer[27]; char has_il_program[18]; char has_intel_queue_families[32]; char has_intel_local_thread[30]; char has_intel_AME[36]; char has_intel_AVC_ME[43]; char has_intel_planar_yuv[20]; char has_intel_required_subgroup_size[32]; char has_altera_dev_temp[29]; char has_p2p[23]; char has_pci_bus_info[20]; char has_spir[12]; char has_qcom_ext_host_ptr[21]; char has_simultaneous_sharing[30]; char has_subgroup_named_barrier[30]; char has_command_buffer[25]; char has_mutable_dispatch[27]; char has_terminate_context[25]; char has_terminate_arm[37]; char has_extended_versioning[27]; char has_cxx_for_opencl[22]; char has_device_uuid[19]; char has_float_atomics[21]; cl_uint dev_version; cl_uint p2p_num_devs; }; #define DEFINE_EXT_CHECK(ext) cl_bool dev_has_##ext(const struct device_info_checks *chk) \ { \ return !!(chk->has_##ext[0]); \ } DEFINE_EXT_CHECK(half) DEFINE_EXT_CHECK(double) DEFINE_EXT_CHECK(nv) DEFINE_EXT_CHECK(amd) DEFINE_EXT_CHECK(amd_svm) DEFINE_EXT_CHECK(arm_svm) DEFINE_EXT_CHECK(intel_usm) DEFINE_EXT_CHECK(external_memory) DEFINE_EXT_CHECK(semaphore) DEFINE_EXT_CHECK(external_semaphore) DEFINE_EXT_CHECK(arm_core_id) DEFINE_EXT_CHECK(arm_job_slots) DEFINE_EXT_CHECK(arm_scheduling_controls) DEFINE_EXT_CHECK(fission) DEFINE_EXT_CHECK(atomic_counters) DEFINE_EXT_CHECK(il_program) DEFINE_EXT_CHECK(intel) DEFINE_EXT_CHECK(intel_queue_families) DEFINE_EXT_CHECK(intel_local_thread) DEFINE_EXT_CHECK(intel_AME) DEFINE_EXT_CHECK(intel_AVC_ME) DEFINE_EXT_CHECK(intel_planar_yuv) DEFINE_EXT_CHECK(intel_required_subgroup_size) DEFINE_EXT_CHECK(altera_dev_temp) DEFINE_EXT_CHECK(p2p) DEFINE_EXT_CHECK(pci_bus_info) DEFINE_EXT_CHECK(spir) DEFINE_EXT_CHECK(qcom_ext_host_ptr) DEFINE_EXT_CHECK(simultaneous_sharing) DEFINE_EXT_CHECK(subgroup_named_barrier) DEFINE_EXT_CHECK(command_buffer) DEFINE_EXT_CHECK(mutable_dispatch) DEFINE_EXT_CHECK(terminate_context) DEFINE_EXT_CHECK(terminate_arm) DEFINE_EXT_CHECK(extended_versioning) DEFINE_EXT_CHECK(cxx_for_opencl) DEFINE_EXT_CHECK(device_uuid) DEFINE_EXT_CHECK(float_atomics) /* In the version checks we negate the opposite conditions * instead of double-negating the actual condition */ // device supports 1.1 cl_bool dev_is_11(const struct device_info_checks *chk) { return !(chk->dev_version < 11); } // device supports 1.2 cl_bool dev_is_12(const struct device_info_checks *chk) { return !(chk->dev_version < 12); } // device supports 2.0 cl_bool dev_is_20(const struct device_info_checks *chk) { return !(chk->dev_version < 20); } // device supports 2.1 cl_bool dev_is_21(const struct device_info_checks *chk) { return !(chk->dev_version < 21); } // device does not support 2.0 cl_bool dev_not_20(const struct device_info_checks *chk) { return !(chk->dev_version >= 20); } // device supports 3.0 cl_bool dev_is_30(const struct device_info_checks *chk) { return !(chk->dev_version < 30); } // device has extended versioning: 3.0 or has_extended_versioning cl_bool dev_has_ext_ver(const struct device_info_checks *chk) { return dev_is_30(chk) || dev_has_extended_versioning(chk); } cl_bool dev_is_gpu(const struct device_info_checks *chk) { return !!(chk->devtype & CL_DEVICE_TYPE_GPU); } cl_bool dev_is_gpu_amd(const struct device_info_checks *chk) { return dev_is_gpu(chk) && dev_has_amd(chk); } /* Device supports cl_amd_device_attribute_query v4 */ cl_bool dev_has_amd_v4(const struct device_info_checks *chk) { /* We don't actually have a criterion to check if the device * supports a specific version of an extension, so for the time * being rely on them being GPU devices with cl_amd_device_attribute_query * and the platform supporting OpenCL 2.0 or later * TODO FIXME tune criteria */ return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks); } /* Device supports cl_intel_device_attribute_query and is a GPU */ cl_bool dev_is_gpu_intel(const struct device_info_checks *chk) { return dev_is_gpu(chk) && dev_has_intel(chk); } /* Device supports cl_arm_core_id v2 */ cl_bool dev_has_arm_core_id_v2(const struct device_info_checks *chk) { /* We don't actually have a criterion to check if the device * supports a specific version of an extension, so for the time * being rely on them having cl_arm_core_id and the platform * supporting OpenCL 1.2 or later * TODO FIXME tune criteria */ return dev_has_arm_core_id(chk) && plat_is_12(chk->pinfo_checks); } /* Device supports register allocation queries */ cl_bool dev_has_arm_register_alloc(const struct device_info_checks *chk) { return dev_has_arm_scheduling_controls(chk) && chk->arm_register_alloc_support; } /* Device supports warp */ cl_bool dev_has_arm_warp_count_support(const struct device_info_checks *chk) { return dev_has_arm_scheduling_controls(chk) && chk->arm_warp_count_support; } cl_bool dev_has_svm(const struct device_info_checks *chk) { return dev_is_20(chk) || dev_has_amd_svm(chk); } cl_bool dev_has_partition(const struct device_info_checks *chk) { return dev_is_12(chk) || dev_has_fission(chk); } cl_bool dev_has_cache(const struct device_info_checks *chk) { return chk->cachetype != CL_NONE; } cl_bool dev_has_lmem(const struct device_info_checks *chk) { return chk->lmemtype != CL_NONE; } cl_bool dev_has_il(const struct device_info_checks *chk) { return dev_is_21(chk) || dev_has_il_program(chk); } cl_bool dev_has_images(const struct device_info_checks *chk) { return chk->image_support; } cl_bool dev_has_images_12(const struct device_info_checks *chk) { return dev_has_images(chk) && dev_is_12(chk); } cl_bool dev_has_images_20(const struct device_info_checks *chk) { return dev_has_images(chk) && dev_is_20(chk); } cl_bool dev_has_image2d_buffer(const struct device_info_checks *chk) { return dev_has_images_20(chk) || !!(chk->has_image2d_buffer[0]); } cl_bool dev_has_compiler(const struct device_info_checks *chk) { return chk->compiler_available; } cl_bool dev_has_compiler_11(const struct device_info_checks *chk) { return dev_is_11(chk) && dev_has_compiler(chk); } cl_bool dev_has_p2p_devs(const struct device_info_checks *chk) { return dev_has_p2p(chk) && chk->p2p_num_devs > 0; } cl_bool dev_has_half_atomics(const struct device_info_checks *chk) { return dev_has_float_atomics(chk) && dev_has_half(chk); } cl_bool dev_has_double_atomics(const struct device_info_checks *chk) { return dev_has_float_atomics(chk) && dev_has_double(chk); } void identify_device_extensions(const char *extensions, struct device_info_checks *chk) { #define _HAS_EXT(ext) (strstr(extensions, ext)) #define CPY_EXT(what, ext) do { \ strncpy(chk->has_##what, has+1, sizeof(ext)); \ chk->has_##what[sizeof(ext)-1] = '\0'; \ } while (0) #define CHECK_EXT(what, ext) do { \ has = _HAS_EXT(" " #ext " "); \ if (has) CPY_EXT(what, #ext); \ } while(0) char *has; CHECK_EXT(half, cl_khr_fp16); CHECK_EXT(spir, cl_khr_spir); CHECK_EXT(double, cl_khr_fp64); if (!dev_has_double(chk)) CHECK_EXT(double, cl_amd_fp64); if (!dev_has_double(chk)) CHECK_EXT(double, cl_APPLE_fp64_basic_ops); CHECK_EXT(nv, cl_nv_device_attribute_query); CHECK_EXT(amd, cl_amd_device_attribute_query); CHECK_EXT(intel, cl_intel_device_attribute_query); CHECK_EXT(amd_svm, cl_amd_svm); CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory); CHECK_EXT(intel_usm, cl_intel_unified_shared_memory); CHECK_EXT(external_memory, cl_khr_external_memory); CHECK_EXT(semaphore, cl_khr_semaphore); CHECK_EXT(external_semaphore, cl_khr_external_semaphore); CHECK_EXT(arm_core_id, cl_arm_core_id); CHECK_EXT(arm_job_slots, cl_arm_job_slot_selection); CHECK_EXT(arm_scheduling_controls, cl_arm_scheduling_controls); CHECK_EXT(fission, cl_ext_device_fission); CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64); if (dev_has_atomic_counters(chk)) CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32); CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer); CHECK_EXT(il_program, cl_khr_il_program); CHECK_EXT(intel_queue_families, cl_intel_command_queue_families); CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread); CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation); CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation); CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv); CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size); CHECK_EXT(altera_dev_temp, cl_altera_device_temperature); CHECK_EXT(p2p, cl_amd_copy_buffer_p2p); CHECK_EXT(pci_bus_info, cl_khr_pci_bus_info); CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr); CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing); CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier); CHECK_EXT(command_buffer, cl_khr_command_buffer); CHECK_EXT(mutable_dispatch, cl_khr_mutable_dispatch); CHECK_EXT(terminate_context, cl_khr_terminate_context); CHECK_EXT(terminate_arm, cl_arm_controlled_kernel_termination); CHECK_EXT(extended_versioning, cl_khr_extended_versioning); CHECK_EXT(cxx_for_opencl, cl_ext_cxx_for_opencl); CHECK_EXT(device_uuid, cl_khr_device_uuid); CHECK_EXT(float_atomics, cl_ext_float_atomics); } /* * Device info print functions */ #define _GET_VAL(ret, loc, val) \ ret->err = REPORT_ERROR_LOC(ret, \ clGetDeviceInfo((loc)->dev, (loc)->param.dev, sizeof(val), &(val), NULL), \ loc, "get %s"); \ CHECK_SIZE(ret, loc, val, clGetDeviceInfo, (loc)->dev, (loc)->param.dev); #define _GET_VAL_VALUES(ret, loc) \ REALLOC(val, numval, loc->sname); \ ret->err = REPORT_ERROR_LOC(ret, \ clGetDeviceInfo(loc->dev, loc->param.dev, szval, val, NULL), \ loc, "get %s"); \ if (ret->err) { free(val); val = NULL; } \ #define _GET_VAL_ARRAY(ret, loc) \ ret->err = REPORT_ERROR_LOC(ret, \ clGetDeviceInfo(loc->dev, loc->param.dev, 0, NULL, &szval), \ loc, "get number of %s"); \ numval = szval/sizeof(*val); \ if (!ret->err && numval > 0) { \ _GET_VAL_VALUES(ret, loc) \ } #define GET_VAL(ret, loc, field) do { \ _GET_VAL(ret, (loc), ret->value.field) \ } while (0) #define GET_VAL_ARRAY(ret, loc) do { \ _GET_VAL_ARRAY(ret, (loc)) \ } while (0) #define DEFINE_DEVINFO_FETCH(type, field) \ type \ device_fetch_##type(struct device_info_ret *ret, \ const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), \ const struct opt_out *output) \ { \ GET_VAL(ret, loc, field); \ return ret->value.field; \ } DEFINE_DEVINFO_FETCH(size_t, s) DEFINE_DEVINFO_FETCH(cl_bool, b) DEFINE_DEVINFO_FETCH(cl_uint, u32) DEFINE_DEVINFO_FETCH(cl_version, u32) DEFINE_DEVINFO_FETCH(cl_ulong, u64) DEFINE_DEVINFO_FETCH(cl_bitfield, u64) DEFINE_DEVINFO_FETCH(cl_device_type, devtype) DEFINE_DEVINFO_FETCH(cl_device_mem_cache_type, cachetype) DEFINE_DEVINFO_FETCH(cl_device_local_mem_type, lmemtype) DEFINE_DEVINFO_FETCH(cl_device_topology_amd, devtopo_amd) DEFINE_DEVINFO_FETCH(cl_device_pci_bus_info_khr, devtopo_khr) DEFINE_DEVINFO_FETCH(cl_device_affinity_domain, affinity_domain) DEFINE_DEVINFO_FETCH(cl_device_fp_config, fpconfig) DEFINE_DEVINFO_FETCH(cl_device_fp_atomic_capabilities_ext, fp_atomic_caps) DEFINE_DEVINFO_FETCH(cl_command_queue_properties, qprop) DEFINE_DEVINFO_FETCH(cl_device_exec_capabilities, execap) DEFINE_DEVINFO_FETCH(cl_device_svm_capabilities, svmcap) DEFINE_DEVINFO_FETCH(cl_device_terminate_capability_khr, termcap) #define DEV_FETCH_LOC(type, var, loc) \ type var = device_fetch_##type(ret, loc, chk, output) #define DEV_FETCH(type, var) DEV_FETCH_LOC(type, var, loc) #define FMT_VAL(loc, ret, fmt, val) if (!ret->err) strbuf_append(loc->pname, &ret->str, fmt, val) #define DEFINE_DEVINFO_SHOW(how, type, field, fmt) \ void \ device_info_##how(struct device_info_ret *ret, \ const struct info_loc *loc, const struct device_info_checks* chk, \ const struct opt_out *output) \ { \ DEV_FETCH(type, val); \ if (!ret->err) FMT_VAL(loc, ret, fmt, val); \ } DEFINE_DEVINFO_SHOW(int, cl_uint, u32, "%" PRIu32) DEFINE_DEVINFO_SHOW(hex, cl_uint, u32, output->json ? "%" PRIu32 : "%#" PRIx32) DEFINE_DEVINFO_SHOW(long, cl_ulong, u64, "%" PRIu64) DEFINE_DEVINFO_SHOW(sz, size_t, s, "%" PRIuS) void device_info_str(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out* UNUSED(output)) { GET_STRING_LOC(ret, loc, clGetDeviceInfo, loc->dev, loc->param.dev); ret->needs_escaping = CL_TRUE; } void device_info_bool(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_bool, val); if (!ret->err) { const char * const * str = (output->mode == CLINFO_HUMAN ? bool_str : output->json ? bool_json_str : bool_raw_str); strbuf_append(loc->pname, &ret->str, "%s", str[val]); } } void device_info_bits(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { DEV_FETCH(cl_uint, val); if (!ret->err) strbuf_append(loc->pname, &ret->str, "%" PRIu32 " bits (%" PRIu32 " bytes)", val, val/8); } void device_info_version(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, u32); if (!ret->err) { strbuf_append(loc->pname, &ret->str, output->json ? "{ \" raw \" : %" PRIu32 ", \"version\" :" : "%#" PRIx32, ret->value.u32); if (output->json || output->mode == CLINFO_HUMAN) { strbuf_version(loc->pname, &ret->str, output->json ? " \"" : " (", ret->value.u32, output->json ? "\" }" : ")"); } } } void device_info_ext_version(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_name_version *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { strbuf_name_version(loc->pname, &ret->str, val, numval, output); } free(val); } void device_info_ext_mem(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_external_memory_handle_type_khr *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { strbuf_ext_mem(loc->pname, &ret->str, val, numval, output); } free(val); } void device_info_semaphore_types(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_semaphore_type_khr *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { strbuf_semaphore_type(loc->pname, &ret->str, val, numval, output); } free(val); } void device_info_ext_semaphore_handles(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_external_semaphore_handle_type_khr *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { strbuf_ext_semaphore_handle(loc->pname, &ret->str, val, numval, output); } free(val); } void strbuf_mem(const char *what, struct _strbuf *str, cl_ulong val) { double dbl = (double)val; size_t sfx = 0; while (dbl > 1024 && sfx < memsfx_end) { dbl /= 1024; ++sfx; } strbuf_append(what, str, " (%.4lg%s)", dbl, memsfx[sfx]); } void device_info_mem(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, u64); if (!ret->err) { strbuf_append(loc->pname, &ret->str, "%" PRIu64, ret->value.u64); if (output->mode == CLINFO_HUMAN && ret->value.u64 > 1024) strbuf_mem(loc->pname, &ret->str, ret->value.u64); } } void device_info_mem_int(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, u32); if (!ret->err) { strbuf_append(loc->pname, &ret->str, "%" PRIu32, ret->value.u32); if (output->mode == CLINFO_HUMAN && ret->value.u32 > 1024) strbuf_mem(loc->pname, &ret->str, ret->value.u32); } } void device_info_mem_sz(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, s); if (!ret->err) { strbuf_append(loc->pname, &ret->str, "%" PRIuS, ret->value.s); if (output->mode == CLINFO_HUMAN && ret->value.s > 1024) strbuf_mem(loc->pname, &ret->str, ret->value.s); } } void device_info_free_mem_amd(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { // Apparently, with the introduction of ROCm, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD // returns 1 or 2 values depending on how it's called: if it's called with a // szval < 2*sizeof(size_t), it will only return 1 value, otherwise it will return 2. // At least now these are documented in the ROCm source code: the first value // is the total amount of free memory, and the second is the size of the largest // free block. So let's just manually ask for both values GET_VAL(ret, loc, u64v2); if (!ret->err) { size_t cursor = 0; if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " [", 2); for (cursor = 0; cursor < 2; ++cursor) { cl_ulong v = ret->value.u64v2.s[cursor]; if (cursor > 0) strbuf_append_str(loc->pname, &ret->str, output->json ? comma_str : spc_str); strbuf_append(loc->pname, &ret->str, "%" PRIuS, v); if (output->mode == CLINFO_HUMAN) strbuf_mem(loc->pname, &ret->str, v*UINT64_C(1024)); } if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); } } void device_info_time_offset(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, u64); if (!ret->err) { time_t time = ret->value.u64/UINT64_C(1000000000); strbuf_append(loc->pname, &ret->str, "%" PRIu64 "ns (", ret->value.u64); strbuf_append_str(loc->pname, &ret->str, ctime(&time)); /* overwrite ctime's newline with the closing parenthesis */ ret->str.buf[ret->str.end - 1] = ')'; } } void device_info_intptr(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_int *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t counter = 0; set_separator(output->mode == CLINFO_HUMAN ? comma_str : output->json ? comma_str : spc_str); if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " [", 2); for (counter = 0; counter < numval; ++counter) { if (counter > 0) strbuf_append_str(loc->pname, &ret->str, sep); strbuf_append(loc->pname, &ret->str, "%" PRId32, val[counter]); } if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); // TODO: ret->value.??? = val; } free(val); } void device_info_szptr_sep(struct device_info_ret *ret, const char *human_sep, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t counter = 0; set_separator(output->mode == CLINFO_HUMAN ? human_sep : output->json ? comma_str : spc_str); if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " [", 2); for (counter = 0; counter < numval; ++counter) { if (counter > 0) strbuf_append_str(loc->pname, &ret->str, sep); strbuf_append(loc->pname, &ret->str, "%" PRIuS, val[counter]); } if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); // TODO: ret->value.??? = val; } free(val); } void device_info_szptr_times(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* chk, const struct opt_out *output) { device_info_szptr_sep(ret, times_str, loc, chk, output); } void device_info_szptr_comma(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* chk, const struct opt_out *output) { device_info_szptr_sep(ret, comma_str, loc, chk, output); } void getWGsizes(struct device_info_ret *ret, const struct info_loc *loc, size_t *wgm, size_t wgm_sz, const struct opt_out* UNUSED(output)) { cl_int log_err; cl_context_properties ctxpft[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)loc->plat, 0, 0 }; cl_uint cursor = 0; cl_context ctx = NULL; cl_program prg = NULL; cl_kernel krn = NULL; ret->err = CL_SUCCESS; ctx = clCreateContext(ctxpft, 1, &loc->dev, NULL, NULL, &ret->err); if (REPORT_ERROR(&ret->err_str, ret->err, "create context")) goto out; prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &ret->err); if (REPORT_ERROR(&ret->err_str, ret->err, "create program")) goto out; ret->err = clBuildProgram(prg, 1, &loc->dev, NULL, NULL, NULL); log_err = REPORT_ERROR(&ret->err_str, ret->err, "build program"); /* for a program build failure, dump the log to stderr before bailing */ if (log_err == CL_BUILD_PROGRAM_FAILURE) { struct _strbuf logbuf; init_strbuf(&logbuf, "program build log"); GET_STRING(&logbuf, ret->err, clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, loc->dev); if (ret->err == CL_SUCCESS) { fflush(stdout); fflush(stderr); fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr); fputs(logbuf.buf, stderr); fflush(stderr); } free_strbuf(&logbuf); } if (ret->err) goto out; for (cursor = 0; cursor < wgm_sz; ++cursor) { strbuf_append(__func__, &ret->str, "sum%u", 1<str.buf[3] = 0; // scalar kernel is called 'sum' krn = clCreateKernel(prg, ret->str.buf, &ret->err); reset_strbuf(&ret->str); if (REPORT_ERROR(&ret->err_str, ret->err, "create kernel")) goto out; ret->err = clGetKernelWorkGroupInfo(krn, loc->dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(*wgm), wgm + cursor, NULL); if (REPORT_ERROR(&ret->err_str, ret->err, "get kernel info")) goto out; clReleaseKernel(krn); krn = NULL; } out: if (krn) clReleaseKernel(krn); if (prg) clReleaseProgram(prg); if (ctx) clReleaseContext(ctx); } void device_info_wg(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { /* preferred workgroup size multiple for each kernel * have not found a platform where the WG multiple changes, * but keep this flexible (this can grow up to 5) */ #define NUM_KERNELS 1 size_t wgm[NUM_KERNELS] = {0}; getWGsizes(ret, loc, wgm, NUM_KERNELS, output); if (!ret->err) { strbuf_append("get WG sizes", &ret->str, "%" PRIuS, wgm[0]); } ret->value.s = wgm[0]; } void device_info_img_sz_2d(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; size_t width = 0, height = 0; _GET_VAL(ret, loc, height); /* HEIGHT */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE2D_MAX_WIDTH); _GET_VAL(ret, &loc2, width); if (!ret->err) { strbuf_append("image size 2D", &ret->str, "%" PRIuS "x%" PRIuS, width, height); } } ret->value.u64v.s[0] = width; ret->value.u64v.s[1] = height; } void device_info_img_sz_intel_planar_yuv(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; size_t width = 0, height = 0; _GET_VAL(ret, loc, height); /* HEIGHT */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL); _GET_VAL(ret, &loc2, width); if (!ret->err) { strbuf_append("image size planar YUV", &ret->str, "%" PRIuS "x%" PRIuS, width, height); } } ret->value.u64v.s[0] = width; ret->value.u64v.s[1] = height; } void device_info_img_sz_3d(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; size_t width = 0, height = 0, depth = 0; _GET_VAL(ret, loc, height); /* HEIGHT */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_WIDTH); _GET_VAL(ret, &loc2, width); if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_IMAGE3D_MAX_DEPTH); _GET_VAL(ret, &loc2, depth); if (!ret->err) { strbuf_append("image size 3D", &ret->str, "%" PRIuS "x%" PRIuS "x%" PRIuS, width, height, depth); } } } ret->value.u64v.s[0] = width; ret->value.u64v.s[1] = height; ret->value.u64v.s[2] = depth; } void strbuf_bitfield(const char *what, struct _strbuf *str, cl_bitfield bits, const char *bits_name, const char * const *bit_str, size_t bit_str_count, const struct opt_out *output) { const char *quote = output->json ? "\"" : ""; /* number of matches so far, for separator placement */ cl_uint count = 0; /* iterator */ cl_uint i = 0; /* leftovers bits */ cl_bitfield known_mask, extra; set_common_separator(output); if (output->json) strbuf_append(what, str, "{ \"raw\" : %" PRIu64 ", \"%s\" : [ ", bits, bits_name); if (bits) { for (i = 0; i < bit_str_count; ++i) { if (bits & ((cl_bitfield)(1) << i)) { strbuf_append(what, str, "%s%s%s%s", (count > 0 ? sep : ""), quote, bit_str[i], quote); ++count; } } /* check for extra bits */ known_mask = ((cl_bitfield)(1) << bit_str_count) - 1; extra = bits & ~known_mask; if (extra) { strbuf_append(what, str, "%s%s%#" PRIx64 "%s", (count > 0 ? sep : ""), quote, extra, quote); } } if (output->json) strbuf_append_str(what, str, " ] }"); } void device_info_bitfield(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output, const cl_bitfield bits, const size_t bit_str_count, /* number of entries in bit_str */ const char * const * bit_str, /* array of strings describing the bits */ const char * bits_name) /* JSON name for this bitfield */ { strbuf_bitfield(loc->pname, &ret->str, bits, bits_name, bit_str, bit_str_count, output); } /* This could use device_info_bitfield, but we prefer to go through fields in reverse, * so we just dup the code */ void device_info_devtype(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, devtype); if (!ret->err) { const char *quote = output->json ? "\"" : ""; const char * const *devstr = (output->mode == CLINFO_HUMAN ? device_type_str : device_type_raw_str); cl_uint i = (cl_uint)actual_devtype_count; /* number of matches so far, for separator placement */ cl_uint count = 0; /* leftovers bits */ cl_device_type known_mask, extra; set_common_separator(output); if (output->json) strbuf_append(loc->pname, &ret->str, "{ \"raw\" : %" PRIu64 ", \"type\" : [ ", ret->value.devtype); /* iterate over device type strings, appending their textual form * to ret->str */ for (; i > 0; --i) { /* assemble CL_DEVICE_TYPE_* from index i */ cl_device_type cur = (cl_device_type)(1) << (i-1); if (ret->value.devtype & cur) { /* match: add separator if not first match */ strbuf_append(loc->pname, &ret->str, "%s%s%s%s", (count > 0 ? sep : ""), quote, devstr[i], quote); ++count; } } /* check for extra bits */ known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1; extra = ret->value.devtype & ~known_mask; if (extra) { strbuf_append(loc->pname, &ret->str, "%s%s%#" PRIx64 "%s", (count > 0 ? sep : ""), quote, extra, quote); } if (output->json) strbuf_append_str(loc->pname, &ret->str, " ] }"); } } void device_info_cachetype(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, cachetype); if (!ret->err) { const char * const *ar = (output->mode == CLINFO_HUMAN ? cache_type_str : cache_type_raw_str); strbuf_append_str(loc->pname, &ret->str, ar[ret->value.cachetype]); ret->needs_escaping = CL_TRUE; } } void device_info_lmemtype(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, lmemtype); if (!ret->err) { const char * const *ar = (output->mode == CLINFO_HUMAN ? lmem_type_str : lmem_type_raw_str); strbuf_append_str(loc->pname, &ret->str, ar[ret->value.lmemtype]); ret->needs_escaping = CL_TRUE; } } void device_info_atomic_caps(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, bits); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.bits, atomic_cap_count, (output->mode == CLINFO_HUMAN ? atomic_cap_str : atomic_cap_raw_str), "capabilities"); } } void device_info_device_enqueue_caps(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, bits); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.bits, device_enqueue_cap_count, (output->mode == CLINFO_HUMAN ? device_enqueue_cap_str : device_enqueue_cap_raw_str), "capabilities"); } } /* cl_arm_core_id */ void device_info_core_ids(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_ulong val; GET_VAL(ret, loc, u64); val = ret->value.u64; if (!ret->err) { const char *quote = output->json ? "\"" : ""; /* The value is a bitfield where each set bit corresponds to a core ID * value that can be returned by the device-side function. We print them * here as ranges, such as 0-4, 8-12 */ int range_start = -1; int cur_bit = 0; if (output->json) strbuf_append(loc->pname, &ret->str, "{ \"raw\" : %" PRIu64 ", \"core_ids\" : [ ", ret->value.u64); set_separator(empty_str); #define CORE_ID_END 64 do { /* Find the start of the range */ while ((cur_bit < CORE_ID_END) && !((val >> cur_bit) & 1)) ++cur_bit; range_start = cur_bit++; /* Find the end of the range */ while ((cur_bit < CORE_ID_END) && ((val >> cur_bit) & 1)) ++cur_bit; /* print the range [range_start, cur_bit[ */ if (range_start >= 0 && range_start < CORE_ID_END) { strbuf_append(loc->pname, &ret->str, "%s%s%d", sep, quote, range_start); if (cur_bit - range_start > 1) strbuf_append(loc->pname, &ret->str, "-%d", cur_bit - 1); set_separator(comma_str); if (output->json) strbuf_append_str(loc->pname, &ret->str, quote); } } while (cur_bit < CORE_ID_END); if (output->json) strbuf_append_str(loc->pname, &ret->str, " ] }"); } } /* cl_arm_job_slot_selection */ void device_info_job_slots(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_uint val; GET_VAL(ret, loc, u32); val = ret->value.u32; if (!ret->err) { const char *quote = output->json ? "\"" : ""; /* The value is a bitfield where each set bit corresponds to an available job slot. * We print them here as ranges, such as 0-4, 8-12 */ int range_start = -1; int cur_bit = 0; if (output->json) strbuf_append(loc->pname, &ret->str, "{ \"raw\" : %" PRIu32 ", \"slots\" : [ ", ret->value.u32); set_separator(empty_str); #define JOB_SLOT_END 32 do { /* Find the start of the range */ while ((cur_bit < JOB_SLOT_END) && !((val >> cur_bit) & 1)) ++cur_bit; range_start = cur_bit++; /* Find the end of the range */ while ((cur_bit < JOB_SLOT_END) && ((val >> cur_bit) & 1)) ++cur_bit; /* print the range [range_start, cur_bit[ */ if (range_start >= 0 && range_start < JOB_SLOT_END) { strbuf_append(loc->pname, &ret->str, "%s%s%d", sep, quote, range_start); if (cur_bit - range_start > 1) strbuf_append(loc->pname, &ret->str, "-%d", cur_bit - 1); set_separator(comma_str); if (output->json) strbuf_append_str(loc->pname, &ret->str, quote); } } while (cur_bit < JOB_SLOT_END); if (output->json) strbuf_append_str(loc->pname, &ret->str, " ] }"); } } void devtopo_pci_str(struct device_info_ret *ret, const cl_device_pci_bus_info_khr *devtopo) { strbuf_append("devtopo", &ret->str, "PCI-E, %04x:%02x:%02x.%u", devtopo->pci_domain, devtopo->pci_bus, devtopo->pci_device, devtopo->pci_function); ret->value.devtopo_khr = *devtopo; } void device_info_devtopo_khr(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, devtopo_khr); /* TODO how to do this in CLINFO_RAW mode */ if (!ret->err) { devtopo_pci_str(ret, &ret->value.devtopo_khr); /* TODO JSONify */ ret->needs_escaping = CL_TRUE; } } /* stringify a cl_device_topology_amd */ void devtopo_amd_str(struct device_info_ret *ret, const cl_device_topology_amd *devtopo) { cl_device_pci_bus_info_khr devtopo_info; switch (devtopo->raw.type) { case 0: /* leave empty */ break; case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD: devtopo_info.pci_domain = 0; devtopo_info.pci_bus = devtopo->pcie.bus; devtopo_info.pci_device = devtopo->pcie.device; devtopo_info.pci_function = devtopo->pcie.function; devtopo_pci_str(ret, &devtopo_info); break; default: strbuf_append("devtopo", &ret->str, "", devtopo->raw.type, devtopo->raw.data[0], devtopo->raw.data[1], devtopo->raw.data[2], devtopo->raw.data[3], devtopo->raw.data[4]); } } void device_info_devtopo_amd(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, devtopo_amd); /* TODO how to do this in CLINFO_RAW mode */ if (!ret->err) { devtopo_amd_str(ret, &ret->value.devtopo_amd); /* TODO JSONify */ ret->needs_escaping = CL_TRUE; } } /* we assemble a clinfo_device_topology_pci struct from the NVIDIA info */ void device_info_devtopo_nv(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; cl_device_pci_bus_info_khr devtopo; DEV_FETCH(cl_uint, val); /* CL_DEVICE_PCI_BUS_ID_NV */ if (!ret->err) { devtopo.pci_bus = val & 0xff; RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_SLOT_ID_NV); _GET_VAL(ret, &loc2, val); if (!ret->err) { cl_int safe_err; devtopo.pci_device = (val >> 3) & 0xff; devtopo.pci_function = val & 7; /* CL_DEVICE_PCI_DOMAIN_ID_NV is not supported in older drivers, * but we have no way to check other than querying, and recovering * in the CL_INVALID_VALUE case */ RESET_LOC_PARAM(loc2, dev, CL_DEVICE_PCI_DOMAIN_ID_NV); safe_err = clGetDeviceInfo(loc2.dev, CL_DEVICE_PCI_DOMAIN_ID_NV, sizeof(val), &val, NULL); if (safe_err == CL_SUCCESS) { devtopo.pci_domain = val; } else if (safe_err == CL_INVALID_VALUE) { devtopo.pci_domain = 0; } else { REPORT_ERROR_LOC(ret, safe_err, &loc2, "get CL_DEVICE_PCI_DOMAIN_ID_NV"); } if (!ret->err) devtopo_pci_str(ret, &devtopo); } } } /* NVIDIA Compute Capability */ void device_info_cc_nv(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; cl_uint major = 0, minor = 0; _GET_VAL(ret, loc, major); /* MAJOR */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); _GET_VAL(ret, &loc2, minor); if (!ret->err) { strbuf_append("NV CC", &ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor); } } ret->value.u32v.s[0] = major; ret->value.u32v.s[1] = minor; } /* AMD GFXIP */ void device_info_gfxip_amd(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; cl_uint major = 0, minor = 0; _GET_VAL(ret, loc, major); /* MAJOR */ if (!ret->err) { RESET_LOC_PARAM(loc2, dev, CL_DEVICE_GFXIP_MINOR_AMD); _GET_VAL(ret, &loc2, minor); if (!ret->err) { strbuf_append("AMD GFXIP", &ret->str, "%" PRIu32 ".%" PRIu32 "", major, minor); } } ret->value.u32v.s[0] = major; ret->value.u32v.s[1] = minor; } /* Intel feature capabilities */ void device_info_intel_features(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, bits); device_info_bitfield(ret, loc, chk, output, ret->value.bits, intel_features_count, (output->mode == CLINFO_HUMAN ? intel_features_str : intel_features_raw_str), "features_intel"); } /* Device Partition, CLINFO_HUMAN header */ void device_info_partition_header(struct device_info_ret *ret, const struct info_loc *UNUSED(loc), const struct device_info_checks *chk, const struct opt_out* UNUSED(output)) { cl_bool is_12 = dev_is_12(chk); cl_bool has_fission = dev_has_fission(chk); strbuf_append("dev partition", &ret->str, "(%s%s%s%s)", (is_12 ? core : empty_str), (is_12 && has_fission ? comma_str : empty_str), chk->has_fission, (!(is_12 || has_fission) ? na : empty_str)); ret->err = CL_SUCCESS; } /* Device partition properties */ void device_info_partition_types(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t numval = 0, szval = 0, cursor = 0; cl_device_partition_property *val = NULL; const char * const *ptstr = (output->mode == CLINFO_HUMAN ? partition_type_str : partition_type_raw_str); GET_VAL_ARRAY(ret, loc); if (!ret->err) { const char *quote = output->json ? "\"" : ""; set_common_separator(output); if (output->json) strbuf_append_str_len(loc->pname, &ret->str, "[ ", 2); for (cursor = 0; cursor < numval; ++cursor) { int str_idx = -1; /* add separator for values past the first */ if (cursor > 0) strbuf_append_str(loc->pname, &ret->str, sep); switch (val[cursor]) { case 0: str_idx = 0; break; case CL_DEVICE_PARTITION_EQUALLY: str_idx = 1; break; case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 2; break; case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 3; break; case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 4; break; default: strbuf_append(loc->pname, &ret->str, "%sby (%#" PRIxPTR ")%s", quote, val[cursor], quote); break; } if (str_idx >= 0) { /* string length, minus _EXT */ size_t slen = strlen(ptstr[str_idx]); if (output->mode == CLINFO_RAW && str_idx > 0) slen -= 4; strbuf_append_str(loc->pname, &ret->str, quote); strbuf_append_str_len(loc->pname, &ret->str, ptstr[str_idx], slen); strbuf_append_str(loc->pname, &ret->str, quote); } } if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); // TODO ret->value.??? = val } free(val); } void device_info_partition_types_ext(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t numval = 0, szval = 0, cursor = 0; cl_device_partition_property_ext *val = NULL; const char * const *ptstr = (output->mode == CLINFO_HUMAN ? partition_type_str : partition_type_raw_str); GET_VAL_ARRAY(ret, loc); if (!ret->err) { const char *quote = output->json ? "\"" : ""; set_common_separator(output); if (output->json) strbuf_append_str_len(loc->pname, &ret->str, "[ ", 1); for (cursor = 0; cursor < numval; ++cursor) { int str_idx = -1; /* add separator for values past the first */ if (cursor > 0) strbuf_append_str(loc->pname, &ret->str, sep); switch (val[cursor]) { case 0: str_idx = 0; break; case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 1; break; case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 2; break; case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 3; break; case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 4; break; default: strbuf_append(loc->pname, &ret->str, "%sby (%#" PRIx64 ")%s", quote, val[cursor], quote); break; } if (str_idx >= 0) { strbuf_append(loc->pname, &ret->str, "%s%s%s", quote, ptstr[str_idx], quote); } } if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); // TODO ret->value.??? = val } free(val); } /* Device partition affinity domains */ void device_info_partition_affinities(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, affinity_domain); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.affinity_domain, affinity_domain_count, (output->mode == CLINFO_HUMAN ? affinity_domain_str : affinity_domain_raw_str), "domain"); } } void device_info_partition_affinities_ext(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { size_t numval = 0, szval = 0, cursor = 0; cl_device_partition_property_ext *val = NULL; const char * const *ptstr = (output->mode == CLINFO_HUMAN ? affinity_domain_ext_str : affinity_domain_raw_ext_str); GET_VAL_ARRAY(ret, loc); if (!ret->err) { const char *quote = output->json ? "\"" : ""; set_common_separator(output); if (output->json) strbuf_append_str_len(loc->pname, &ret->str, "[ ", 2); for (cursor = 0; cursor < numval; ++cursor) { int str_idx = -1; /* add separator for values past the first */ if (cursor > 0) strbuf_append_str(loc->pname, &ret->str, sep); switch (val[cursor]) { case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break; case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break; case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break; case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break; case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break; case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break; default: strbuf_append(loc->pname, &ret->str, "%s (%#" PRIx64 ")%s", quote, val[cursor], quote); break; } if (str_idx >= 0) { strbuf_append(loc->pname, &ret->str, "%s%s%s", quote, ptstr[str_idx], quote); } } if (output->json) strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); // TODO: ret->value.??? = val } free(val); } /* Preferred / native vector widths */ void device_info_vecwidth(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { struct info_loc loc2 = *loc; cl_uint preferred = 0, native = 0; _GET_VAL(ret, loc, preferred); if (!ret->err) { /* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF, * which is at +0x08 */ loc2.param.dev += (loc2.param.dev == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30); /* TODO update loc2.sname */ _GET_VAL(ret, &loc2, native); if (!ret->err) { const char *ext = (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ? chk->has_half : (loc2.param.dev == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ? chk->has_double : NULL)); strbuf_append(loc->pname, &ret->str, "%8u / %-8u", preferred, native); if (ext) strbuf_append(loc->pname, &ret->str, " (%s)", *ext ? ext : na); } } ret->value.u32v.s[0] = preferred; ret->value.u32v.s[1] = native; } /* Floating-point configurations */ void device_info_fpconf(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { /* When in HUMAN output, we are called unconditionally, * so we have to do some manual checks ourselves */ const cl_bool get_it = (output->mode != CLINFO_HUMAN) || (loc->param.dev == CL_DEVICE_SINGLE_FP_CONFIG) || (loc->param.dev == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) || (loc->param.dev == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk)); GET_VAL(ret, loc, fpconfig); /* Sanitize! */ if (ret->err && !get_it) { ret->err = CL_SUCCESS; ret->value.fpconfig = 0; } if (output->json) strbuf_append(loc->pname, &ret->str, "{ \"raw\" : %" PRIu64 ", \"config\" : [ ", ret->value.fpconfig); if (!ret->err) { cl_uint i = 0; cl_uint count = 0; const char * const *fpstr = (output->mode == CLINFO_HUMAN ? fp_conf_str : fp_conf_raw_str); set_common_separator(output); if (output->mode == CLINFO_HUMAN) { const char *why = na; switch (loc->param.dev) { case CL_DEVICE_HALF_FP_CONFIG: if (get_it) why = chk->has_half; break; case CL_DEVICE_SINGLE_FP_CONFIG: why = core; break; case CL_DEVICE_DOUBLE_FP_CONFIG: if (get_it) why = chk->has_double; break; default: /* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */ fprintf(stderr, "unsupported floating-point configuration parameter %s\n", loc->pname); } /* show 'why' it's being shown */ strbuf_append(loc->pname, &ret->str, "(%s)", why); } if (get_it) { const char *quote = output->json ? "\"" : ""; size_t num_flags = fp_conf_count; /* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered * in the single-precision case. half and double don't consider it, * so we skip it altogether */ if (loc->param.dev != CL_DEVICE_SINGLE_FP_CONFIG) num_flags -= 1; for (i = 0; i < num_flags; ++i) { cl_device_fp_config cur = (cl_device_fp_config)(1) << i; cl_bool present = !!(ret->value.fpconfig & cur); if (output->mode == CLINFO_HUMAN) { strbuf_append(loc->pname, &ret->str, "\n%s" I2_STR "%s", line_pfx, fpstr[i], bool_str[present]); } else if (present) { strbuf_append(loc->pname, &ret->str, "%s%s%s%s", (count > 0 ? sep : ""), quote, fpstr[i], quote); ++count; } } } } if (output->json) strbuf_append_str(loc->pname, &ret->str, " ] }"); } /* Floating-point atomic capabilities ext */ void device_info_fp_atomic_caps(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks * UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, fp_atomic_caps); if (output->json) strbuf_append(loc->pname, &ret->str, "{ \"raw\" : %" PRIu64 ", \"config\" : [ ", ret->value.fp_atomic_caps); if (!ret->err) { cl_uint i = 0; cl_uint offset = 0; cl_uint count = 0; const char * const *fpstr = (output->mode == CLINFO_HUMAN ? fp_atomic_caps_str : fp_atomic_caps_raw_str); set_common_separator(output); const char *quote = output->json ? "\"" : ""; const size_t num_flags = fp_atomic_caps_count; for (offset = 0; offset < 32; offset += 16) { for (i = 0; i < num_flags; ++i) { cl_device_fp_atomic_capabilities_ext cur = (cl_device_fp_atomic_capabilities_ext)(1) << i; cl_bool present = !!(ret->value.fpconfig & cur); if (output->mode == CLINFO_HUMAN) { strbuf_append(loc->pname, &ret->str, "\n%s" I2_STR "%s", line_pfx, fpstr[i], bool_str[present]); } else if (present) { strbuf_append(loc->pname, &ret->str, "%s%s%s%s", (count > 0 ? sep : ""), quote, fpstr[i], quote); ++count; } } } } if (output->json) strbuf_append_str(loc->pname, &ret->str, " ] }"); } /* Queue properties */ void device_info_qprop(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { GET_VAL(ret, loc, qprop); if (!ret->err) { const char * const *qpstr = (output->mode == CLINFO_HUMAN ? queue_prop_str : queue_prop_raw_str); if (output->mode != CLINFO_HUMAN) { device_info_bitfield(ret, loc, chk, output, ret->value.qprop, queue_prop_count, qpstr, "queue_prop"); } else { /* output->mode == CLINFO_HUMAN */ for (cl_uint i = 0; i < queue_prop_count; ++i) { cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i; cl_bool present =!!(ret->value.qprop & cur); strbuf_append(loc->pname, &ret->str, "\n%s" I2_STR "%s", line_pfx, qpstr[i], bool_str[present]); } /* TODO FIXME extra bits? */ if (loc->param.dev == CL_DEVICE_QUEUE_PROPERTIES && dev_has_intel_local_thread(chk)) strbuf_append(loc->pname, &ret->str, "\n%s" I2_STR "%s", line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]); } } } void device_info_command_buffer_caps(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { GET_VAL(ret, loc, cmdbufcap); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.cmdbufcap, command_buffer_count, (output->mode == CLINFO_RAW ? command_buffer_raw_str : command_buffer_str), "capabilities"); } } void device_info_mutable_dispatch_caps(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { GET_VAL(ret, loc, cmdbufcap); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.cmdbufcap, mutable_dispatch_count, (output->mode == CLINFO_RAW ? mutable_dispatch_raw_str : mutable_dispatch_str), "capabilities"); } } void device_info_intel_usm_cap(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { GET_VAL(ret, loc, svmcap); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.svmcap, intel_usm_cap_count, (output->mode == CLINFO_RAW ? intel_usm_cap_raw_str : intel_usm_cap_str), "capabilities"); } } /* Device queue family properties */ void strbuf_intel_queue_family(const char *what, struct _strbuf *str, const cl_queue_family_properties_intel *fams, size_t num_fams, const struct opt_out *output) { realloc_strbuf(str, num_fams*(CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL + 512), "queue families"); if (output->json) { strbuf_append_str(what, str, "{"); } for (size_t i = 0; i < num_fams; ++i) { const cl_queue_family_properties_intel *fam = fams + i; set_separator(output->mode == CLINFO_HUMAN ? full_padding : output->json ? comma_str : spc_str); if (i > 0) strbuf_append_str(what, str, sep); if (output->json || output->mode == CLINFO_HUMAN) { strbuf_append(what, str, output->json ? "\"%s\" : { \"count\" : %u" : "%-65s(%u)", fam->name, fam->count); } else { strbuf_append(what, str, "%s:%u:", fam->name, fam->count); } if (output->json) strbuf_append(what, str, ", \"proprerties\" : "); else if (output->mode == CLINFO_HUMAN) strbuf_append(what, str, "\n%115s", "Queue properties" INDENT); strbuf_bitfield(what, str, fam->properties, "properties", output->mode == CLINFO_RAW ? queue_prop_raw_str : queue_prop_str, queue_prop_count, output); if (output->json) strbuf_append(what, str, ", \"capabilities\" : "); else if (output->mode == CLINFO_HUMAN) strbuf_append(what, str, "\n%115s", "Capabilities" INDENT); else strbuf_append(what, str, ":"); strbuf_bitfield(what, str, fam->properties, "capabilities", output->mode == CLINFO_RAW ? intel_queue_cap_raw_str : intel_queue_cap_str, intel_queue_cap_count, output); if (output->json) strbuf_append(what, str, "}"); } if (output->json) strbuf_append_str(what, str, " }"); } void device_info_qfamily_prop(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_queue_family_properties_intel *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { strbuf_intel_queue_family(loc->pname, &ret->str, val, numval, output); // TODO: ret->value.??? = val; } free(val); } /* Execution capabilities */ void device_info_execap(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, execap); if (!ret->err) { const char * const *qpstr = (output->mode == CLINFO_HUMAN ? execap_str : execap_raw_str); if (output->mode != CLINFO_HUMAN) { device_info_bitfield(ret, loc, chk, output, ret->value.execap, execap_count, qpstr, "type"); } else { /* output->mode == CLINFO_HUMAN */ for (cl_uint i = 0; i < execap_count; ++i) { cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i; cl_bool present =!!(ret->value.execap & cur); strbuf_append(loc->pname, &ret->str, "\n%s" I2_STR "%s", line_pfx, qpstr[i], bool_str[present]); } } } } /* Arch bits and endianness (HUMAN) */ void device_info_arch(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { struct info_loc loc2 = *loc; DEV_FETCH(cl_uint, bits); RESET_LOC_PARAM(loc2, dev, CL_DEVICE_ENDIAN_LITTLE); if (!ret->err) { DEV_FETCH_LOC(cl_bool, val, &loc2); if (!ret->err) { strbuf_append(loc->pname, &ret->str, "%" PRIu32 ", %s", bits, endian_str[val]); } } } /* SVM capabilities */ void device_info_svm_cap(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out *output) { const cl_bool is_20 = dev_is_20(chk); const cl_bool checking_core = (loc->param.dev == CL_DEVICE_SVM_CAPABILITIES); const cl_bool has_amd_svm = (checking_core && dev_has_amd_svm(chk)); GET_VAL(ret, loc, svmcap); if (!ret->err) { const char * const *scstr = (output->mode == CLINFO_HUMAN ? svm_cap_str : svm_cap_raw_str); if (output->mode != CLINFO_HUMAN) { device_info_bitfield(ret, loc, chk, output, ret->value.svmcap, svm_cap_count, scstr, "capabilities"); } else { /* output->mode == CLINFO_HUMAN */ if (checking_core) { /* show 'why' it's being shown */ strbuf_append(loc->pname, &ret->str, "(%s%s%s)", (is_20 ? core : empty_str), (is_20 && has_amd_svm ? comma_str : empty_str), chk->has_amd_svm); } for (cl_uint i = 0; i < svm_cap_count; ++i) { cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i; cl_bool present = !!(ret->value.svmcap & cur); strbuf_append(loc->pname, &ret->str, "\n%s" I2_STR "%s", line_pfx, scstr[i], bool_str[present]); } } } } /* Device terminate capability */ void device_info_terminate_capability(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, termcap); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.termcap, terminate_capability_count, (output->mode == CLINFO_HUMAN ? terminate_capability_str : terminate_capability_raw_str), "terminate"); } } /* Device terminate capability */ void device_info_terminate_arm(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, termcap); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.termcap, terminate_capability_arm_count, (output->mode == CLINFO_HUMAN ? terminate_capability_arm_str : terminate_capability_arm_raw_str), "terminate"); } } /* ARM scheduling controls */ void device_info_arm_scheduling_controls(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { GET_VAL(ret, loc, sched_controls); if (!ret->err) { device_info_bitfield(ret, loc, chk, output, ret->value.sched_controls, arm_scheduling_controls_count, (output->mode == CLINFO_HUMAN ? arm_scheduling_controls_str : arm_scheduling_controls_raw_str), "scheduling controls"); } } void device_info_p2p_dev_list(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks *chk, const struct opt_out* UNUSED(output)) { // Contrary to most array values in OpenCL, the AMD platform does not support querying // CL_DEVICE_P2P_DEVICES_AMD with a NULL ptr to get the number of results. // The user is assumed to have queried for the CL_DEVICE_NUM_P2P_DEVICES_AMD first, // and to have allocated the return array beforehand. cl_device_id *val = NULL; size_t numval = chk->p2p_num_devs, szval = numval*sizeof(*val); _GET_VAL_VALUES(ret, loc); if (!ret->err) { size_t cursor = 0; strbuf_append_str_len(loc->pname, &ret->str, "[ ", 2); set_common_separator(output); for (cursor = 0; cursor < numval; ++cursor) { strbuf_append(loc->pname, &ret->str, "%s%p", (cursor > 0 ? sep : ""), (void*)val[cursor]); } strbuf_append_str_len(loc->pname, &ret->str, " ]", 2); // TODO: ret->value.??? = val; } free(val); } void device_info_interop_list(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_uint *val = NULL; size_t szval = 0, numval = 0; GET_VAL_ARRAY(ret, loc); if (!ret->err) { size_t cursor = 0; const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops; cl_uint human_raw = output->mode - CLINFO_HUMAN; const char *groupsep = (output->mode == CLINFO_HUMAN ? comma_str : vbar_str); cl_bool first = CL_TRUE; szval = 0; for (cursor = 0; cursor < numval; ++cursor) { cl_uint current = val[cursor]; if (!current && cursor < numval - 1) { /* A null value is used as group terminator, but we only print it * if it's not the final one */ strbuf_append_str(loc->pname, &ret->str, groupsep); first = CL_TRUE; } if (current) { cl_bool found = CL_FALSE; const cl_interop_name *n = cl_interop_names; if (!first) { strbuf_append_str(loc->pname, &ret->str, " "); } while (n < interop_name_end) { if (current >= n->from && current <= n->to) { found = CL_TRUE; break; } ++n; } if (found) { cl_uint i = current - n->from; strbuf_append(loc->pname, &ret->str, "%s", n->value[i][human_raw]); } else { strbuf_append(loc->pname, &ret->str, "%#" PRIx32, val[cursor]); } first = CL_FALSE; } } // TODO: ret->value.??? = val; } // TODO JSONify ret->needs_escaping = CL_TRUE; free(val); } void device_info_uuid(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_uchar uuid[CL_UUID_SIZE_KHR]; _GET_VAL(ret, loc, uuid); if (!ret->err) { strbuf_append(loc->pname, &ret->str, "%02x%02x%02x%02x-" "%02x%02x-" "%02x%02x-" "%02x%02x-" "%02x%02x%02x%02x%02x%02x", uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]); } ret->needs_escaping = CL_TRUE; } void device_info_luid(struct device_info_ret *ret, const struct info_loc *loc, const struct device_info_checks* UNUSED(chk), const struct opt_out *output) { cl_uchar uuid[CL_LUID_SIZE_KHR]; _GET_VAL(ret, loc, uuid); if (!ret->err) { /* TODO not sure this is the correct representation for LUIDs? */ strbuf_append(loc->pname, &ret->str, "%02x%02x-%02x%02x%02x%02x%02x%02x", uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7]); } ret->needs_escaping = CL_TRUE; } /* * Device info traits */ /* A CL_FALSE param means "just print pname" */ struct device_info_traits { enum output_modes output_mode; cl_device_info param; // CL_DEVICE_* const char *sname; // "CL_DEVICE_*" const char *pname; // "Device *" const char *sfx; // suffix for the output in non-raw mode /* pointer to function that retrieves the parameter */ void (*show_func)(struct device_info_ret *, const struct info_loc *, const struct device_info_checks *, const struct opt_out *); /* pointer to function that checks if the parameter should be retrieved */ cl_bool (*check_func)(const struct device_info_checks *); }; #define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ #define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ struct device_info_traits dinfo_traits[] = { { CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL }, /* This has to be made before calling NUMERIC_VERSION , since to know if it's supported * we need to know about the extensions */ { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS_WITH_VERSION, "Device Extensions with Version", ext_version), dev_has_ext_ver }, { CLINFO_BOTH, DINFO(CL_DEVICE_UUID_KHR, "Device UUID", uuid), dev_has_device_uuid }, { CLINFO_BOTH, DINFO(CL_DRIVER_UUID_KHR, "Driver UUID", uuid), dev_has_device_uuid }, { CLINFO_BOTH, DINFO(CL_DEVICE_LUID_VALID_KHR, "Valid Device LUID", bool), dev_has_device_uuid }, { CLINFO_BOTH, DINFO(CL_DEVICE_LUID_KHR, "Device LUID", luid), dev_has_device_uuid }, { CLINFO_BOTH, DINFO(CL_DEVICE_NODE_MASK_KHR, "Device Node Mask", hex), dev_has_device_uuid }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUMERIC_VERSION, "Device Numeric Version", version), dev_has_ext_ver }, { CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), dev_is_11 }, { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR, "Device OpenCL C Numeric Version", version), dev_has_extended_versioning }, { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_ALL_VERSIONS, "Device OpenCL C all versions", ext_version), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_FEATURES, "Device OpenCL C features", ext_version), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT, "Device C++ for OpenCL Numeric Version", version), dev_has_cxx_for_opencl }, { CLINFO_BOTH, DINFO(CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, "Latest conformance test passed", str), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_PCIE_ID_AMD, "Device PCI-e ID (AMD)", hex), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd }, /* Device Topology (NV) is multipart, so different for HUMAN and RAW */ { CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_PCI_DOMAIN_ID_NV, "Device PCI domain (NV)", int), dev_has_nv }, /* Device Topology / PCI bus info (KHR) */ { CLINFO_BOTH, DINFO(CL_DEVICE_PCI_BUS_INFO_KHR, "Device PCI bus info (KHR)", devtopo_khr), dev_has_pci_bus_info }, { CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL }, { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, "Available core IDs (ARM)", core_ids), dev_has_arm_core_id_v2 }, { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, "Available core IDs (ARM)", long), dev_has_arm_core_id_v2 }, { CLINFO_HUMAN, DINFO(CL_DEVICE_JOB_SLOTS_ARM, "Available job slots (ARM)", job_slots), dev_has_arm_job_slots }, { CLINFO_RAW, DINFO(CL_DEVICE_JOB_SLOTS_ARM, "Available job slots (ARM)", int), dev_has_arm_job_slots }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL }, /* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */ { CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv }, { CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv }, /* GFXIP (AMD) is multipart, so different for HUMAN and RAW */ /* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query * extension specification calls it “core engine GFXIP”, which honestly is not better than * our name choice. */ { CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd }, { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd }, { CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd }, /* Device IP version (Intel) */ { CLINFO_BOTH, DINFO(CL_DEVICE_IP_VERSION_INTEL, "Device IP (Intel)", version), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO(CL_DEVICE_ID_INTEL, "Device ID (Intel)", int), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SLICES_INTEL, "Slices (Intel)", int), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL, "Sub-slices per slice (Intel)", int), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, "EUs per sub-slice (Intel)", int), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_THREADS_PER_EU_INTEL, "Threads per EU (Intel)", int), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO(CL_DEVICE_FEATURE_CAPABILITIES_INTEL, "Feature capabilities (Intel)", intel_features), dev_is_gpu_intel }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp }, /* Device partition support: summary is only presented in HUMAN case */ { CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission }, { CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL }, /* cl_amd_device_attribute_query v4 */ { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple (device)", sz), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple (kernel)", wg), dev_has_compiler_11 }, { CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier }, { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size }, /* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs * PREFERRED and NATIVE in a single line */ #define DINFO_VECWIDTH(Type, type) \ { CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \ { CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \ { CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), dev_is_11 } { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL }, DINFO_VECWIDTH(CHAR, char), DINFO_VECWIDTH(SHORT, short), DINFO_VECWIDTH(INT, int), DINFO_VECWIDTH(LONG, long), DINFO_VECWIDTH(HALF, half), /* this should be excluded for 1.0 */ DINFO_VECWIDTH(FLOAT, float), DINFO_VECWIDTH(DOUBLE, double), /* Floating point configurations */ #define DINFO_FPCONF(Type, type, cond) \ { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }, \ { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), cond } DINFO_FPCONF(HALF, Half, dev_has_half), DINFO_FPCONF(SINGLE, Single, NULL), DINFO_FPCONF(DOUBLE, Double, dev_has_double), /* Address bits and endianness are written together for HUMAN, separate for RAW */ { CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL }, { CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL }, { CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL }, /* External memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, "External memory handle types", ext_mem), dev_has_external_memory }, // TODO should only be queried if extension version >= 0.9.3 { CLINFO_BOTH, DINFO(CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR, "External memory assume linear img handle types", ext_mem), dev_has_external_memory }, /* Semaphores */ { CLINFO_BOTH, DINFO(CL_DEVICE_SEMAPHORE_TYPES_KHR, "Semaphore types", semaphore_types), dev_has_semaphore }, { CLINFO_BOTH, DINFO(CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, "External semaphore import types", ext_semaphore_handles), dev_has_external_semaphore }, { CLINFO_BOTH, DINFO(CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, "External semaphore export types", ext_semaphore_handles), dev_has_external_semaphore }, /* Global memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), dev_is_11 }, { CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm }, { CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm }, { CLINFO_HUMAN, DINFO_SFX(CL_FALSE, "Unified Shared Memory (USM)", "(cl_intel_unified_shared_memory)", str), dev_has_intel_usm }, { CLINFO_BOTH, DINFO(CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, "Host USM capabilities (Intel)", intel_usm_cap), dev_has_intel_usm }, { CLINFO_BOTH, DINFO(CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, "Device USM capabilities (Intel)", intel_usm_cap), dev_has_intel_usm }, { CLINFO_BOTH, DINFO(CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, "Single-Device USM caps (Intel)", intel_usm_cap), dev_has_intel_usm }, { CLINFO_BOTH, DINFO(CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, "Cross-Device USM caps (Intel)", intel_usm_cap), dev_has_intel_usm }, { CLINFO_BOTH, DINFO(CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, "Shared System USM caps (Intel)", intel_usm_cap), dev_has_intel_usm }, /* Alignment */ { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL }, { CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL }, { CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr }, /* Atomics alignment, with HUMAN-only header */ { CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 }, /* 3.0+ Atomic memory and fence capabilities */ { CLINFO_BOTH, DINFO(CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, "Atomic memory capabilities", atomic_caps), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, "Atomic fence capabilities", atomic_caps), dev_is_30 }, /* Floating point atomic capabilities */ #define DINFO_FP_ATOMIC(Type, type, cond) \ { CLINFO_HUMAN, DINFO(CL_DEVICE_##Type##_FP_ATOMIC_CAPABILITIES_EXT, #type "-precision Floating-point atomic capabilities", fp_atomic_caps), cond }, \ { CLINFO_RAW, DINFO(CL_DEVICE_##Type##_FP_ATOMIC_CAPABILITIES_EXT, #type "-precision Floating-point atomic capabilities", fp_atomic_caps), cond } DINFO_FP_ATOMIC(HALF, Half, dev_has_half_atomics), DINFO_FP_ATOMIC(SINGLE, Single, dev_has_float_atomics), DINFO_FP_ATOMIC(DOUBLE, Double, dev_has_double_atomics), /* Global variables. TODO some 1.2 devices respond to this too */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 }, /* Global memory cache */ { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache }, /* Image support */ { CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer }, { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer }, /* Image dimensions are split for RAW, combined for HUMAN */ { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size", pixels_str, img_sz_2d), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height", sz), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width", sz), dev_has_images }, { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size", pixels_str, img_sz_2d), dev_has_intel_planar_yuv }, { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height", sz), dev_has_intel_planar_yuv }, { CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width", sz), dev_has_intel_planar_yuv }, { CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size", pixels_str, img_sz_3d), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height", sz), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width", sz), dev_has_images }, { CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth", sz), dev_has_images }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 }, /* Pipes */ { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_SUPPORT, "Pipe support", bool), dev_is_30 }, /* TODO FIXME: the above should be true if dev is [2.0, 3.0[, and the next properties should be nested */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 }, /* Local memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem }, { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory size per CU (AMD)", mem), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv }, /* Constant memory */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 }, /* Generic address space support */ { CLINFO_BOTH, DINFO(CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, "Generic address space support", bool), dev_is_30}, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters }, /* Queue properties */ { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, "Device enqueue capabilities", device_enqueue_caps), dev_is_30 }, /* TODO FIXME: the above should be true if dev is [2.0, 3.0[, and the next properties should be nested */ { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 }, { CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, "Device queue families", qfamily_prop), dev_has_intel_queue_families }, /* Command buffers */ { CLINFO_BOTH, DINFO(CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR, "Command buffer capabilities", command_buffer_caps), dev_has_command_buffer }, { CLINFO_BOTH, DINFO(CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR, INDENT "Required queue properties for command buffer", qprop), dev_has_command_buffer }, { CLINFO_BOTH, DINFO(CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, "Mutable dispatch capabilities", mutable_dispatch_caps), dev_has_mutable_dispatch }, /* Terminate context */ { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context }, { CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR, "Terminate capability (2.x and later)", terminate_capability), dev_has_terminate_context }, { CLINFO_BOTH, DINFO(CL_DEVICE_CONTROLLED_TERMINATION_CAPABILITIES_ARM, "Controlled termination caps. (ARM)", terminate_arm), dev_has_terminate_arm }, /* Interop */ { CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing }, { CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing }, /* P2P buffer copy */ { CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p }, { CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p_devs }, /* Profiling resolution */ { CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL }, { CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd }, { CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd }, /* Kernel execution capabilities */ { CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL }, { CLINFO_BOTH, DINFO(CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, INDENT "Non-uniform work-groups", bool), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, INDENT "Work-group collective functions", bool), dev_is_30 }, { CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 }, { CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd }, { CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, INDENT "Concurrent copy and kernel execution (NV)", bool), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT INDENT "Number of async copy engines", int), dev_has_nv }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 }, /* TODO FIXME undocumented, experimental */ { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 }, { CLINFO_BOTH, DINFO(CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, INDENT "Scheduling controls (ARM)", arm_scheduling_controls), dev_has_arm_scheduling_controls }, { CLINFO_BOTH, DINFO(CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM, INDENT "Supported reg allocs (ARM)", intptr), dev_has_arm_register_alloc }, { CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WARP_COUNT_ARM, INDENT "Max warps/CU (ARM)", int), dev_has_arm_warp_count_support }, /* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */ { CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il }, { CLINFO_BOTH, DINFO(CL_DEVICE_ILS_WITH_VERSION, INDENT "ILs with version", ext_version), dev_has_ext_ver }, { CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir }, { CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 }, { CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, "Built-in kernels with version", ext_version), dev_has_ext_ver }, { CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME }, { CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME }, }; /* Process all the device info in the traits, except if param_whitelist is not NULL, * in which case only those in the whitelist will be processed. * If present, the whitelist should be sorted in the order of appearance of the parameters * in the traits table, and terminated by the value CL_FALSE */ void printDeviceInfo(cl_device_id dev, const struct platform_list *plist, cl_uint p, const cl_device_info *param_whitelist, /* list of device info to process, or NULL */ const struct opt_out *output) { char *extensions = NULL; size_t ext_len = 0; char *versioned_extensions = NULL; /* pointers to the traits for CL_DEVICE_EXTENSIONS and CL_DEVICE_EXTENSIONS_WITH_VERSION */ const struct device_info_traits *extensions_traits = NULL; const struct device_info_traits *versioned_extensions_traits = NULL; struct device_info_checks chk; struct device_info_ret ret; struct info_loc loc; cl_uint n = 0; /* number of device properties shown, for JSON */ memset(&chk, 0, sizeof(chk)); chk.pinfo_checks = plist->platform_checks + p; chk.dev_version = 10; INIT_RET(ret, "device"); reset_loc(&loc, __func__); loc.plat = plist->platform[p]; loc.dev = dev; for (loc.line = 0; loc.line < ARRAY_SIZE(dinfo_traits); ++loc.line) { const struct device_info_traits *traits = dinfo_traits + loc.line; cl_bool requested; /* checked is true if there was no condition to check for, or if the * condition was satisfied */ int checked = !(traits->check_func && !traits->check_func(&chk)); loc.sname = traits->sname; loc.pname = (output->mode == CLINFO_HUMAN ? traits->pname : traits->sname); loc.param.dev = traits->param; /* Whitelist check: finish if done traversing the list, * skip current param if it's not the right one */ if ((output->cond == COND_PROP_CHECK || output->brief) && param_whitelist) { if (*param_whitelist == CL_FALSE) break; if (traits->param != *param_whitelist) continue; ++param_whitelist; } /* skip if it's not for this output mode */ if (!(output->mode & traits->output_mode)) continue; if (output->cond == COND_PROP_CHECK && !checked) continue; cur_sfx = (output->mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str; reset_strbuf(&ret.str); reset_strbuf(&ret.err_str); ret.needs_escaping = CL_FALSE; /* Handle headers */ if (traits->param == CL_FALSE) { ret.err = CL_SUCCESS; show_strbuf(&ret.str, loc.pname, 0, ret.err); continue; } traits->show_func(&ret, &loc, &chk, output); /* Do not print this property if the user requested one and this does not match */ requested = is_selected_prop(output, loc.sname); if (traits->param == CL_DEVICE_EXTENSIONS) { /* make a backup of the extensions string, regardless of * errors and requested, because we need the information * to fetch further information */ const char *msg = RET_BUF(ret)->buf; ext_len = strlen(msg); extensions_traits = traits; /* pad with spaces: this will make it easier to check for extension presence * without erroneously matching substrings by simply padding the extension name * with spaces. */ ALLOC(extensions, ext_len+3, "extensions"); memcpy(extensions + 1, msg, ext_len); extensions[0] = ' '; extensions[ext_len+1] = ' '; extensions[ext_len+2] = '\0'; } else if (traits->param == CL_DEVICE_EXTENSIONS_WITH_VERSION) { if (ret.err && !checked && output->cond != COND_PROP_SHOW) continue; /* This will be displayed at the end, after we display the output of CL_DEVICE_EXTENSIONS */ const char *msg = RET_BUF(ret)->buf; const size_t len = RET_BUF(ret)->sz; if (!requested) continue; versioned_extensions_traits = traits; ALLOC(versioned_extensions, len, "versioned extensions"); memcpy(versioned_extensions, msg, len); } else if (requested) { if (ret.err) { /* if there was an error retrieving the property, * skip if it wasn't expected to work and we * weren't asked to show everything regardless of * error */ if (!checked && output->cond != COND_PROP_SHOW) continue; } else { /* on success, but empty result, show (n/a) */ if (ret.str.buf[0] == '\0') { reset_strbuf(&ret.str); strbuf_append_str(loc.pname, &ret.str, not_specified(output)); } } if (output->brief && output->json) json_stringify(RET_BUF(ret)->buf); else if (output->brief) printf("%s%s\n", line_pfx, RET_BUF(ret)->buf); else if (output->json) json_strbuf(RET_BUF(ret), loc.pname, n++, ret.err || ret.needs_escaping); else show_strbuf(RET_BUF(ret), loc.pname, 0, ret.err); } if (ret.err) continue; switch (traits->param) { case CL_DEVICE_VERSION: /* compute numeric value for OpenCL version */ chk.dev_version = getOpenCLVersion(ret.str.buf + 7); break; case CL_DEVICE_EXTENSIONS: identify_device_extensions(extensions, &chk); if (!requested) { free(extensions); extensions = NULL; } break; case CL_DEVICE_TYPE: chk.devtype = ret.value.devtype; break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: chk.cachetype = ret.value.cachetype; break; case CL_DEVICE_LOCAL_MEM_TYPE: chk.lmemtype = ret.value.lmemtype; break; case CL_DEVICE_IMAGE_SUPPORT: chk.image_support = ret.value.b; break; case CL_DEVICE_COMPILER_AVAILABLE: chk.compiler_available = ret.value.b; break; case CL_DEVICE_NUM_P2P_DEVICES_AMD: chk.p2p_num_devs = ret.value.u32; break; case CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM: chk.arm_register_alloc_support = !!(ret.value.sched_controls & CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM); // TODO warp count support should check for extension version >= 0.4 chk.arm_warp_count_support = !!(ret.value.sched_controls); break; default: /* do nothing */ break; } } // and finally the extensions, if we retrieved them if (extensions) { // undo the padding extensions[ext_len + 1] = '\0'; if (output->json) { printf("%s\"%s\" : ", (n > 0 ? comma_str : spc_str), (output->mode == CLINFO_HUMAN ? extensions_traits->pname : extensions_traits->sname)); json_stringify(extensions + 1); ++n; } else printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ? extensions_traits->pname : extensions_traits->sname), extensions + 1); } if (versioned_extensions) { if (output->json) { printf("%s\"%s\" : ", (n > 0 ? comma_str : spc_str), (output->mode == CLINFO_HUMAN ? versioned_extensions_traits->pname : versioned_extensions_traits->sname)); fputs(versioned_extensions, stdout); ++n; } else { printf("%s" I1_STR "%s\n", line_pfx, (output->mode == CLINFO_HUMAN ? versioned_extensions_traits->pname : versioned_extensions_traits->sname), versioned_extensions); } } free(extensions); free(versioned_extensions); extensions = NULL; UNINIT_RET(ret); } /* list of allowed properties for AMD offline devices */ /* everything else seems to be set to 0, and all the other string properties * actually segfault the driver */ static const cl_device_info amd_offline_info_whitelist[] = { CL_DEVICE_NAME, /* These are present, but all the same, so just skip them: CL_DEVICE_VENDOR, CL_DEVICE_VENDOR_ID, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_OPENCL_C_VERSION, */ CL_DEVICE_EXTENSIONS, CL_DEVICE_TYPE, CL_DEVICE_GFXIP_MAJOR_AMD, CL_DEVICE_GFXIP_MINOR_AMD, CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_FALSE }; static const cl_device_info list_info_whitelist[] = { CL_DEVICE_NAME, CL_FALSE }; /* return a list of offline devices from the AMD extension */ cl_device_id * fetchOfflineDevicesAMD(const struct platform_list *plist, cl_uint p, /* the number of devices will be returned in ret->value.u32, * the associated context in ret->base.ctx; */ struct device_info_ret *ret) { cl_platform_id pid = plist->platform[p]; cl_device_id *device = NULL; cl_uint num_devs = 0; cl_context ctx = NULL; cl_context_properties ctxpft[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)pid, CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE, 0 }; ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL, NULL, NULL, &ret->err); REPORT_ERROR(&ret->err_str, ret->err, "create context"); if (!ret->err) { ret->err = REPORT_ERROR(&ret->err_str, clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES, sizeof(num_devs), &num_devs, NULL), "get num devs"); } if (!ret->err) { ALLOC(device, num_devs, "offline devices"); ret->err = REPORT_ERROR(&ret->err_str, clGetContextInfo(ctx, CL_CONTEXT_DEVICES, num_devs*sizeof(*device), device, NULL), "get devs"); } if (ret->err) { if (ctx) clReleaseContext(ctx); free(device); device = NULL; } else { ret->value.u32 = num_devs; ret->base.ctx = ctx; } return device; } void printPlatformName(const struct platform_list *plist, cl_uint p, struct _strbuf *str, const struct opt_out *output) { const struct platform_data *pdata = plist->pdata + p; const char *brief_prefix = (output->mode == CLINFO_HUMAN ? "Platform #" : ""); const char *title = (output->mode == CLINFO_HUMAN ? pinfo_traits[0].pname : pinfo_traits[0].sname); const int prefix_width = -line_pfx_len*(!output->brief); if (output->brief) { strbuf_append(__func__, str, "%s%" PRIu32 ": ", brief_prefix, p); } else if (output->mode == CLINFO_RAW) { strbuf_append(__func__, str, "[%s/*]", pdata->sname); } sprintf(line_pfx, "%*s", prefix_width, str->buf); reset_strbuf(str); if (output->brief) printf("%s%s\n", line_pfx, pdata->pname); else printf("%s" I1_STR "%s\n", line_pfx, title, pdata->pname); } void printPlatformDevices(const struct platform_list *plist, cl_uint p, const cl_device_id *device, cl_uint ndevs, struct _strbuf *str, const struct opt_out *output, cl_bool these_are_offline) { const struct platform_data *pdata = plist->pdata + p; const cl_device_info *param_whitelist = output->brief ? list_info_whitelist : these_are_offline ? amd_offline_info_whitelist : NULL; cl_uint d; if (output->json) printf("%s\"%s\" : [", (these_are_offline ? comma_str : spc_str), (these_are_offline ? "offline" : "online")); else if (output->detailed) printf("%s" I0_STR "%" PRIu32 "\n", line_pfx, num_devs_header(output, these_are_offline), ndevs); for (d = 0; d < ndevs; ++d) { const cl_device_id dev = device[d]; if (!is_selected_device(output, p, d)) continue; if (output->brief) { const cl_bool last_device = (d == ndevs - 1 && output->mode != CLINFO_RAW && (!output->offline || !pdata->has_amd_offline || these_are_offline)); if (output->json) { /* nothing to do */ } else if (output->mode == CLINFO_RAW) sprintf(line_pfx, "%" PRIu32 "%c%" PRIu32 ": ", p, these_are_offline ? '*' : '.', d); else sprintf(line_pfx, " +-- %sDevice #%" PRIu32 ": ", these_are_offline ? "Offline " : "", d); if (last_device) line_pfx[1] = '`'; } else if (line_pfx_len > 0) { cl_int sd = (these_are_offline ? -1 : 1)*(cl_int)d; strbuf_append(__func__, str, "[%s/%" PRId32 "]", pdata->sname, sd); sprintf(line_pfx, "%*s", -line_pfx_len, str->buf); reset_strbuf(str); } if (output->json) printf("%s%s", (d > 0 ? comma_str : spc_str), (output->brief ? "" : "{")); printDeviceInfo(dev, plist, p, param_whitelist, output); if (output->json) { if (!output->brief) printf(" }"); } else if (output->detailed && d < pdata[p].ndevs - 1) puts(""); fflush(stdout); fflush(stderr); } if (output->json) fputs(" ]", stdout); } void showDevices(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms + (output->null_platform ? 1 : 0); const cl_uint maxdevs = plist->max_devs; const struct platform_data *pdata = plist->pdata; cl_uint p; struct _strbuf str; init_strbuf(&str, __func__); if (output->mode == CLINFO_RAW) { if (output->brief) strbuf_append(__func__, &str, "%" PRIu32 ".%" PRIu32 ": ", num_platforms, maxdevs); else strbuf_append(__func__, &str, "[%*s/%" PRIu32 "] ", plist->max_sname_len, "", maxdevs); } else { if (output->brief) strbuf_append(__func__, &str, " +-- %sDevice #%" PRIu32 ": ", (output->offline ? "Offline " : ""), maxdevs); /* TODO we have no prefix in HUMAN detailed output mode, * consider adding one */ } if (str.buf[0]) { line_pfx_len = (int)(strlen(str.buf) + 1); REALLOC(line_pfx, line_pfx_len, "line prefix"); reset_strbuf(&str); } for (p = 0; p < num_platforms; ++p) { /* skip non-selected platforms altogether */ if (!is_selected_platform(output, p)) continue; /* Open the JSON devices list for this platform */ if (output->json) printf("%s{", p > 0 ? comma_str : spc_str); /* skip platform header if only printing specfic properties, */ else if (!output->num_selected_props) printPlatformName(plist, p, &str, output); printPlatformDevices(plist, p, get_platform_devs(plist, p), pdata[p].ndevs, &str, output, CL_FALSE); if (output->offline && pdata[p].has_amd_offline) { struct device_info_ret ret; cl_device_id *devs = NULL; INIT_RET(ret, "offline device"); if (output->detailed) puts(""); devs = fetchOfflineDevicesAMD(plist, p, &ret); if (ret.err) { puts(ret.err_str.buf); } else { printPlatformDevices(plist, p, devs, ret.value.u32, &str, output, CL_TRUE); clReleaseContext(ret.base.ctx); free(devs); } UNINIT_RET(ret); } /* Close JSON object for this platform */ if (output->json) fputs(" }", stdout); else if (output->detailed) puts(""); } free_strbuf(&str); } /* check the behavior of clGetPlatformInfo() when given a NULL platform ID */ void checkNullGetPlatformName(const struct opt_out *output) { struct device_info_ret ret; struct info_loc loc; INIT_RET(ret, "null ctx"); reset_loc(&loc, __func__); RESET_LOC_PARAM(loc, plat, CL_PLATFORM_NAME); ret.err = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ret.str.sz, ret.str.buf, NULL); if (ret.err == CL_INVALID_PLATFORM) { strbuf_append(__func__, &ret.err_str, no_plat(output)); } else { loc.line = __LINE__ + 1; REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s"); } printf(I1_STR "%s\n", "clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", RET_BUF(ret)->buf); UNINIT_RET(ret); } /* check the behavior of clGetDeviceIDs() when given a NULL platform ID; * return the index of the default platform in our array of platform IDs, * or num_platforms (which is an invalid platform index) in case of errors * or no platform or device found. */ cl_uint checkNullGetDevices(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const struct platform_data *pdata = plist->pdata; const cl_platform_id *platform = plist->platform; struct device_info_ret ret; struct info_loc loc; cl_uint i = 0; /* generic iterator */ cl_device_id dev = NULL; /* sample device */ cl_platform_id plat = NULL; /* detected platform */ cl_uint found = 0; /* number of platforms found */ cl_uint pidx = num_platforms; /* index of the platform found */ cl_uint numdevs = 0; INIT_RET(ret, "null get devices"); reset_loc(&loc, __func__); loc.sname = "device IDs"; ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs); /* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart * implementation might give you a different default platform for GPUs * and for CPUs. * Of course the “no devices” case would then need to be handled differently. * The logic might be maintained similarly, provided we also gather * the number of devices of each type for each platform, although it's * obviously more likely to have multiple platforms with no devices * of a given type. */ switch (ret.err) { case CL_INVALID_PLATFORM: strbuf_append_str(__func__, &ret.err_str, no_plat(output)); break; case CL_DEVICE_NOT_FOUND: /* No devices were found, see if there are platforms with * no devices, and if there's only one, assume this is the * one being used as default by the ICD loader */ for (i = 0; i < num_platforms; ++i) { if (pdata[i].ndevs == 0) { ++found; if (found > 1) break; else { plat = platform[i]; pidx = i; } } } switch (found) { case 0: strbuf_append_str(__func__, &ret.err_str, (output->mode == CLINFO_HUMAN ? "" : "CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM")); break; case 1: strbuf_append(__func__, &ret.err_str, "%s%s%s%s", no_dev_found(output), (output->mode == CLINFO_HUMAN ? " [" : " | "), (output->mode == CLINFO_HUMAN ? pdata[pidx].pname : pdata[pidx].sname), (output->mode == CLINFO_HUMAN ? "?]" : "?")); break; default: /* found > 1 */ strbuf_append_str(__func__, &ret.err_str, (output->mode == CLINFO_HUMAN ? "" : "CL_DEVICE_NOT_FOUND | ????")); break; } break; default: loc.line = __LINE__+1; if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get number of %s")) break; /* Determine platform by looking at the CL_DEVICE_PLATFORM of * one of the devices */ ret.err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL); loc.line = __LINE__+1; if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM); ret.err = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL); loc.line = __LINE__+1; if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; for (i = 0; i < num_platforms; ++i) { if (platform[i] == plat) { pidx = i; strbuf_append(__func__, &ret.str, "%s [%s]", (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"), pdata[i].sname); break; } } if (i == num_platforms) { ret.err = CL_INVALID_PLATFORM; strbuf_append(__func__, &ret.err_str, "", (void*)plat); } } printf(I1_STR "%s\n", "clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", RET_BUF(ret)->buf); UNINIT_RET(ret); return pidx; } void checkNullCtx(struct device_info_ret *ret, const struct platform_list *plist, cl_uint pidx, const char *which, const struct opt_out *output) { const cl_device_id *dev = plist->all_devs + plist->dev_offset[pidx]; struct info_loc loc; cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &ret->err); reset_loc(&loc, __func__); loc.sname = which; loc.line = __LINE__+2; if (!REPORT_ERROR_LOC(ret, ret->err, &loc, "create context with device from %s platform")) strbuf_append(__func__, &ret->str, "%s [%s]", (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"), plist->pdata[pidx].sname); if (ctx) { clReleaseContext(ctx); ctx = NULL; } } /* check behavior of clCreateContextFromType() with NULL cl_context_properties */ void checkNullCtxFromType(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const struct platform_data *pdata = plist->pdata; const cl_platform_id *platform = plist->platform; size_t t; /* type iterator */ size_t i; /* generic iterator */ char def[1024]; cl_context ctx = NULL; size_t ndevs = 8; size_t szval = 0; size_t cursz = ndevs*sizeof(cl_device_id); cl_platform_id plat = NULL; cl_device_id *devs = NULL; struct device_info_ret ret; struct info_loc loc; const char *platname_prop = (output->mode == CLINFO_HUMAN ? pinfo_traits[0].pname : pinfo_traits[0].sname); const char *devname_prop = (output->mode == CLINFO_HUMAN ? dinfo_traits[0].pname : dinfo_traits[0].sname); reset_loc(&loc, __func__); INIT_RET(ret, "null ctx from type"); ALLOC(devs, ndevs, "context devices"); for (t = 1; t < devtype_count; ++t) { /* we skip 0 */ loc.sname = device_type_raw_str[t]; strbuf_append(__func__, &ret.str, "clCreateContextFromType(NULL, %s)", loc.sname); sprintf(def, I1_STR, ret.str.buf); reset_strbuf(&ret.str); loc.line = __LINE__+1; ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &ret.err); switch (ret.err) { case CL_INVALID_PLATFORM: strbuf_append_str(__func__, &ret.err_str, no_plat(output)); break; case CL_DEVICE_NOT_FOUND: strbuf_append_str(__func__, &ret.err_str, no_dev_found(output)); break; case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */ strbuf_append_str(__func__, &ret.err_str, invalid_dev_type(output)); break; case CL_INVALID_VALUE: /* This is what apple returns for the case above */ strbuf_append_str(__func__, &ret.err_str, invalid_dev_type(output)); break; case CL_DEVICE_NOT_AVAILABLE: strbuf_append_str(__func__, &ret.err_str, no_dev_avail(output)); break; default: if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "create context from type %s")) break; /* get the devices */ loc.sname = "CL_CONTEXT_DEVICES"; loc.line = __LINE__+2; ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s size")) break; if (szval > cursz) { REALLOC(devs, szval, "context devices"); cursz = szval; } loc.line = __LINE__+1; ret.err = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; ndevs = szval/sizeof(cl_device_id); if (ndevs < 1) { ret.err = CL_DEVICE_NOT_FOUND; strbuf_append_str(__func__, &ret.err_str, ""); } /* get the platform from the first device */ RESET_LOC_PARAM(loc, dev, CL_DEVICE_PLATFORM); loc.line = __LINE__+1; ret.err = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; loc.plat = plat; for (i = 0; i < num_platforms; ++i) { if (platform[i] == plat) break; } if (i == num_platforms) { ret.err = CL_INVALID_PLATFORM; strbuf_append(__func__, &ret.err_str, "", (void*)plat); break; } else { strbuf_append(__func__, &ret.str, "%s (%" PRIuS ")", (output->mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"), ndevs); strbuf_append(__func__, &ret.str, "\n" I2_STR "%s", platname_prop, pdata[i].pname); } for (i = 0; i < ndevs; ++i) { size_t szname = 0; /* for each device, show the device name */ /* TODO some other unique ID too, e.g. PCI address, if available? */ strbuf_append(__func__, &ret.str, "\n" I2_STR, devname_prop); RESET_LOC_PARAM(loc, dev, CL_DEVICE_NAME); loc.dev = devs[i]; loc.line = __LINE__+1; ret.err = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, ret.str.sz - ret.str.end, ret.str.buf + ret.str.end, &szname); if (REPORT_ERROR_LOC(&ret, ret.err, &loc, "get %s")) break; ret.str.end += szname - 1; } if (i != ndevs) break; /* had an error earlier, bail */ } if (ctx) { clReleaseContext(ctx); ctx = NULL; } printf("%s%s\n", def, RET_BUF(ret)->buf); reset_strbuf(&ret.str); reset_strbuf(&ret.err_str); } free(devs); UNINIT_RET(ret); } /* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices) * and in clCreateContext() */ void checkNullBehavior(const struct platform_list *plist, const struct opt_out *output) { const cl_uint num_platforms = plist->num_platforms; const struct platform_data *pdata = plist->pdata; cl_uint p = 0; struct device_info_ret ret; INIT_RET(ret, "null behavior"); printf("NULL platform behavior\n"); checkNullGetPlatformName(output); p = checkNullGetDevices(plist, output); /* If there's a default platform, and it has devices, try * creating a context with its first device and see if it works */ if (p == num_platforms) { ret.err = CL_INVALID_PLATFORM; strbuf_append(__func__, &ret.err_str, no_plat(output)); } else if (pdata[p].ndevs == 0) { ret.err = CL_DEVICE_NOT_FOUND; strbuf_append(__func__, &ret.err_str, no_dev_found(output)); } else { if (p < num_platforms) { checkNullCtx(&ret, plist, p, "default", output); } else { /* this shouldn't happen, but still ... */ ret.err = CL_OUT_OF_HOST_MEMORY; strbuf_append_str(__func__, &ret.err_str, ""); } } printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", RET_BUF(ret)->buf); /* Look for a device from a non-default platform, if there are any */ if (p == num_platforms || num_platforms > 1) { cl_uint p2 = 0; reset_strbuf(&ret.str); reset_strbuf(&ret.err_str); while (p2 < num_platforms && (p2 == p || pdata[p2].ndevs == 0)) { p2++; } if (p2 < num_platforms) { checkNullCtx(&ret, plist, p2, "non-default", output); } else { ret.err = CL_DEVICE_NOT_FOUND; strbuf_append(__func__, &ret.err_str, ""); } printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", RET_BUF(ret)->buf); } checkNullCtxFromType(plist, output); UNINIT_RET(ret); } /* Get properties of the ocl-icd loader, if available */ /* All properties are currently char[] */ /* Function pointer to the ICD loader info function */ typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*); icdl_info_fn_ptr clGetICDLoaderInfoOCLICD; /* We want to auto-detect the OpenCL version supported by the ICD loader. * To do this, we will progressively find symbols introduced in new APIs, * until a NULL symbol is found. */ struct icd_loader_test { cl_uint version; const char *symbol; } icd_loader_tests[] = { { 11, "clCreateSubBuffer" }, { 12, "clCreateImage" }, { 20, "clSVMAlloc" }, { 21, "clGetHostTimer" }, { 22, "clSetProgramSpecializationConstant" }, { 30, "clSetContextDestructorCallback" }, { 0, NULL } }; void icdl_info_str(struct icdl_info_ret *ret, const struct info_loc *loc) { GET_STRING_LOC(ret, loc, clGetICDLoaderInfoOCLICD, loc->param.icdl); return; } struct icdl_info_traits { cl_icdl_info param; // CL_ICDL_* const char *sname; // "CL_ICDL_*" const char *pname; // "ICD loader *" }; static const char * const oclicdl_pfx = "OCLICD"; #define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name } struct icdl_info_traits linfo_traits[] = { LINFO(CL_ICDL_NAME, "Name"), LINFO(CL_ICDL_VENDOR, "Vendor"), LINFO(CL_ICDL_VERSION, "Version"), LINFO(CL_ICDL_OCL_VERSION, "Profile") }; /* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress, * which returns a void pointer. * ISO C forbids assignments between function pointers and void pointers, * but POSIX allows it. To compile without warnings even in -pedantic mode, * we take advantage of the fact that we _can_ do the conversion via * pointers-to-pointers. This is supported on most compilers, except * for some rather old GCC versions whose strict aliasing rules are * too strict. Disable strict aliasing warnings for these compilers. */ #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46) #pragma GCC diagnostic ignored "-Wstrict-aliasing" #endif struct icdl_data oclIcdProps(const struct platform_list *plist, const struct opt_out *output) { const cl_uint max_plat_version = plist->max_plat_version; struct icdl_data icdl; /* clinfo may lag behind the OpenCL standard or loader version, * and we don't want to give a warning if we can't tell if the loader * correctly supports a version unknown to us */ cl_uint clinfo_highest_known_version = 0; /* Counter that'll be used to walk the icd_loader_tests */ int i = 0; /* We find the clGetICDLoaderInfoOCLICD extension address, which will be used * to query the ICD loader properties. * It should be noted that in this specific case we cannot replace the * call to clGetExtensionFunctionAddress with a call to the superseding function * clGetExtensionFunctionAddressForPlatform because the extension is in the * loader itself, not in a specific platform. */ void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD"); clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack); /* Initialize icdl_data ret versions */ icdl.detected_version = 10; icdl.reported_version = 0; /* Step #1: try to auto-detect the supported ICD loader version */ do { struct icd_loader_test check = icd_loader_tests[i]; if (check.symbol == NULL) break; if (dlsym(DL_MODULE, check.symbol) == NULL) break; clinfo_highest_known_version = icdl.detected_version = check.version; ++i; } while (1); /* Step #2: query properties from extension, if available */ if (clGetICDLoaderInfoOCLICD != NULL) { cl_uint n = 0; /* number of ICD loader properties shown, for JSON */ struct info_loc loc; struct icdl_info_ret ret; reset_loc(&loc, __func__); INIT_RET(ret, "ICD loader"); /* TODO think of a sensible header in CLINFO_RAW */ if (output->mode != CLINFO_RAW) puts("\nICD loader properties"); if (output->json) { fputs(", \"icd_loader\" : {", stdout); } else if (output->mode == CLINFO_RAW) { line_pfx_len = (int)(strlen(oclicdl_pfx) + 5); REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD"); strbuf_append(loc.pname, &ret.str, "[%s/*]", oclicdl_pfx); sprintf(line_pfx, "%*s", -line_pfx_len, ret.str.buf); reset_strbuf(&ret.str); } for (loc.line = 0; loc.line < ARRAY_SIZE(linfo_traits); ++loc.line) { const struct icdl_info_traits *traits = linfo_traits + loc.line; cl_bool requested; loc.sname = traits->sname; loc.pname = (output->mode == CLINFO_HUMAN ? traits->pname : traits->sname); loc.param.icdl = traits->param; cur_sfx = empty_str; reset_strbuf(&ret.str); reset_strbuf(&ret.err_str); icdl_info_str(&ret, &loc); /* Do not print this property if the user requested one and this does not match */ requested = is_selected_prop(output, loc.sname); if (requested) { if (output->json) json_strbuf(RET_BUF(ret), loc.pname, n++, CL_TRUE); else show_strbuf(RET_BUF(ret), loc.pname, 1, ret.err); } if (!ret.err && traits->param == CL_ICDL_OCL_VERSION) { icdl.reported_version = getOpenCLVersion(ret.str.buf + 7); } } if (output->json) printf("%s\"_detected_version\" : \"%" PRIu32 ".%" PRIu32 "\" }", (n > 0 ? comma_str : spc_str), SPLIT_CL_VERSION(icdl.detected_version)); UNINIT_RET(ret); } /* Step #3: show it */ if (output->mode == CLINFO_HUMAN) { // for the loader vs platform max version check we use the version we detected // if the reported version is known to us, and the reported version if it's higher // than the standard versions we know about cl_uint max_version_check = icdl.reported_version > clinfo_highest_known_version ? icdl.reported_version : icdl.detected_version; if (icdl.reported_version && icdl.reported_version <= clinfo_highest_known_version && icdl.reported_version != icdl.detected_version) { printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %" PRIu32 ".%" PRIu32 ",\n" "\t\tbut it seems to support up to OpenCL %" PRIu32 ".%" PRIu32 " %s.\n", SPLIT_CL_VERSION(icdl.reported_version), SPLIT_CL_VERSION(icdl.detected_version), icdl.detected_version < icdl.reported_version ? "only" : "too"); } if (max_version_check < max_plat_version) { printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %" PRIu32 ".%" PRIu32 ",\n" "\t\tbut some installed platforms support OpenCL %" PRIu32 ".%" PRIu32 ".\n" "\t\tPrograms using %" PRIu32 ".%" PRIu32 " features may crash\n" "\t\tor behave unexpectedly\n", SPLIT_CL_VERSION(icdl.detected_version), SPLIT_CL_VERSION(max_plat_version), SPLIT_CL_VERSION(max_plat_version)); } } return icdl; } #if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46) #pragma GCC diagnostic warning "-Wstrict-aliasing" #endif void version(void) { puts("clinfo version 3.0.25.02.14"); } void add_selected_device(struct opt_out *output, cl_uint p, cl_uint d) { if (output->num_selected_devices == MAX_SELECTED_DEVICES) { fprintf(stderr, "too many device specifications (max: %u)\n", MAX_SELECTED_DEVICES); exit(1); } cl_uint2 *dst = output->selected_devices + output->num_selected_devices++; dst->s[0] = p; dst->s[1] = d; } void parse_device_spec(const char *str, struct opt_out *output) { int p, d, n; if (!str) { fprintf(stderr, "please specify a device in the form P:D where P is the platform number and D the device number\n"); exit(1); } n = sscanf(str, "%d:%d", &p, &d); if (n != 2 || p < 0 || d < 0) { fprintf(stderr, "invalid device specification '%s'\n", str); exit(1); } add_selected_device(output, p, d); } void free_output(struct opt_out * UNUSED(output)) { /* nothing to do until we implement proper memory management * for selected_devices and selected_props */ } void add_selected_prop(struct opt_out *output, const char* prop) { if (output->num_selected_props == MAX_SELECTED_PROPS) { fprintf(stderr, "too many properties specifications (max: %u)\n", MAX_SELECTED_PROPS); exit(1); } output->selected_props[output->num_selected_props++] = prop; } void parse_prop(const char *input, struct opt_out *output) { /* We normalize the property name by upcasing it and replacing the minus sign (-) * with the underscore (_). If any other character is found, we consider it an error */ size_t len = strlen(input); char *normalized; ALLOC(normalized, len+1, "normalized property name"); for (size_t i = 0; i < len; ++i) { char c = input[i]; if ( (c == '_') || ( c >= 'A' && c <= 'Z')) normalized[i] = c; else if (c >= 'a' && c <= 'z') normalized[i] = 'A' + (c - 'a'); else if (c == '-') normalized[i] = '_'; else { fprintf(stderr, "invalid property name substring '%s'\n", input); exit(1); } } add_selected_prop(output, normalized); } void usage(void) { version(); puts("Display properties of all available OpenCL platforms and devices"); puts("Usage: clinfo [options ...]\n"); puts("Options:"); puts("\t--all-props, -a\t\ttry all properties, only show valid ones"); puts("\t--always-all-props, -A\tshow all properties, even if invalid"); puts("\t--human\t\t\thuman-friendly output (default)"); puts("\t--raw\t\t\traw output"); puts("\t--json\t\t\toutput raw data in JSON format (experimental)"); puts("\t--offline\t\talso show offline devices"); puts("\t--null-platform\t\talso show the NULL platform devices"); puts("\t--list, -l\t\tonly list the platforms and devices by name"); puts("\t--prop prop-name\tonly list properties matching the given name"); puts("\t--device p:d, -d p:d\tonly show information about device number d from platform number p"); puts("\t--help, -h, -?\t\tshow usage"); puts("\t--version, -v\t\tshow version\n"); puts("Defaults to raw mode if invoked with a name that contains the string \"raw\""); } int main(int argc, char *argv[]) { cl_uint p; cl_int err; int a = 0; struct opt_out output; struct platform_list plist; init_plist(&plist); output.num_selected_devices = 0; output.num_selected_props = 0; output.mode = CLINFO_HUMAN; output.cond = COND_PROP_CHECK; output.brief = CL_FALSE; output.offline = CL_FALSE; output.null_platform = CL_FALSE; output.json = CL_FALSE; output.check_size = CL_FALSE; /* if there's a 'raw' in the program name, switch to raw output mode */ if (strstr(argv[0], "raw")) output.mode = CLINFO_RAW; /* process command-line arguments */ while (++a < argc) { if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props")) output.cond = COND_PROP_TRY; else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props")) output.cond = COND_PROP_SHOW; else if (!strcmp(argv[a], "--raw")) output.mode = CLINFO_RAW; else if (!strcmp(argv[a], "--human")) output.mode = CLINFO_HUMAN; else if (!strcmp(argv[a], "--offline")) output.offline = CL_TRUE; else if (!strcmp(argv[a], "--null-platform")) output.null_platform = CL_TRUE; else if (!strcmp(argv[a], "--json")) output.json = CL_TRUE; else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list")) output.brief = CL_TRUE; else if (!strcmp(argv[a], "-d") || !strcmp(argv[a], "--device")) { ++a; parse_device_spec(argv[a], &output); } else if (!strncmp(argv[a], "-d", 2)) { parse_device_spec(argv[a] + 2, &output); } else if (!strcmp(argv[a], "--prop")) { ++a; parse_prop(argv[a], &output); } else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h") || !strcmp(argv[a], "--help")) { usage(); free_output(&output); return 0; } else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) { version(); free_output(&output); return 0; } else { fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]); } } /* If a property was specified, we only print in RAW mode. * Likewise, JSON format assumes RAW */ if (output.num_selected_props || output.json) output.mode = CLINFO_RAW; output.detailed = !output.brief && !output.num_selected_devices && !output.num_selected_props; err = clGetPlatformIDs(0, NULL, &plist.num_platforms); if (err != CL_PLATFORM_NOT_FOUND_KHR) CHECK_ERROR(err, "number of platforms"); if (output.detailed && !output.json) printf(I0_STR "%" PRIu32 "\n", (output.mode == CLINFO_HUMAN ? "Number of platforms" : "#PLATFORMS"), plist.num_platforms); cl_uint alloced_platforms = 0; if (plist.num_platforms) { alloced_platforms = alloc_plist(&plist, &output); err = clGetPlatformIDs(plist.num_platforms, plist.platform, NULL); CHECK_ERROR(err, "platform IDs"); } ALLOC(line_pfx, 1, "line prefix"); /* Open the JSON object and the JSON platforms list */ if (output.json) fputs("{ \"platforms\" : [", stdout); for (p = 0; p < alloced_platforms; ++p) { // skip non-selected platforms altogether if (!(is_selected_platform(&output, p))) { /* Update the dev_offset, otherwise the wrong devices will be picked * when using a specification such as -d 0:0 -d 2:0 */ if (p) { plist.dev_offset[p] = plist.dev_offset[p-1] + plist.pdata[p-1].ndevs; plist.pdata[p].ndevs = 0; } continue; } /* Open a JSON object for this platform */ if (output.json) printf("%s%s", (p > 0 ? comma_str : spc_str), (output.brief ? "" : "{")); gatherPlatformInfo(&plist, p, &output); /* Close JSON object for this platform */ if (output.json && !output.brief) fputs(" }", stdout); else if (output.detailed) puts(""); } /* Close JSON platforms list, open JSON devices list */ if (alloced_platforms) { if (output.json) fputs(" ], \"devices\" : [", stdout); showDevices(&plist, &output); } /* Close JSON devices list */ if (output.json) fputs(" ]", stdout); if (output.num_selected_props || (output.detailed && !output.num_selected_devices)) { if (output.mode != CLINFO_RAW && plist.num_platforms) checkNullBehavior(&plist, &output); oclIcdProps(&plist, &output); } /* Close the JSON object */ if (output.json) fputs(" }", stdout); free_plist(&plist); free(line_pfx); free_output(&output); return 0; } clinfo-3.0.25.02.14/src/ctx_prop.h000066400000000000000000000015771475367065600163510ustar00rootroot00000000000000/* List of OpenCL context properties used to interoperate with a different API */ #ifndef CTX_PROP #define CTX_PROP /* cl_khr_gl_sharing */ #define CL_GL_CONTEXT_KHR 0x2008 #define CL_EGL_DISPLAY_KHR 0x2009 #define CL_GLX_DISPLAY_KHR 0x200A #define CL_WGL_HDC_KHR 0x200B #define CL_CGL_SHAREGROUP_KHR 0x200C /* cl_khr_dx9_media_sharing */ #define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 #define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 #define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 /* cl_khr_d3d10_sharing */ #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 /* cl_khr_d3d11_sharing */ #define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D /* cl_intel_dx9_media_sharing */ #define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 #define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 #define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 /* cl_intel_va_api_media_sharing */ #define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 #endif clinfo-3.0.25.02.14/src/error.h000066400000000000000000000032201475367065600156270ustar00rootroot00000000000000/* OpenCL error handling */ #ifndef ERROR_H #define ERROR_H #include #include "ext.h" #include "info_loc.h" #include "fmtmacros.h" #include "strbuf.h" cl_int check_ocl_error(cl_int err, const char *what, const char *func, int line) { if (err != CL_SUCCESS) { fflush(stdout); fflush(stderr); fprintf(stderr, "%s:%u: %s : error %d\n", func, line, what, err); fflush(stderr); } return err; } cl_int report_ocl_error_basic(struct _strbuf *str, cl_int err, const char *what, const char *func, int line) { if (err != CL_SUCCESS) { snprintf(str->buf, str->sz, "<%s:%d: %s : error %d>", func, line, what, err); } return err; } cl_int report_ocl_error_loc(struct _strbuf *str, cl_int err, const char *fmt, const struct info_loc *loc) { static char full_fmt[1024]; if (err != CL_SUCCESS) { snprintf(full_fmt, 1024, "<%s:%" PRIuS ": %s : error %d>", loc->function, loc->line, fmt, err); snprintf(str->buf, str->sz, full_fmt, loc->sname); } return err != CL_SUCCESS; } void report_size_mismatch(struct _strbuf *str, size_t req, size_t ours, const struct info_loc *loc) { snprintf(str->buf, str->sz, "<%s:%" PRIuS ": %s : size mismatch " "(requested %" PRIuS ", we offer %" PRIuS ")>", loc->function, loc->line, loc->sname, req, ours); } #define CHECK_ERROR(error, what) if (check_ocl_error(error, what, __func__, __LINE__)) exit(1) #define REPORT_ERROR(str, err, what) report_ocl_error_basic(str, err, what, __func__, __LINE__) #define REPORT_ERROR_LOC(ret, err, loc, what) report_ocl_error_loc(&((ret)->err_str), err, what, loc) #define REPORT_SIZE_MISMATCH(str, loc, req, ours) report_size_mismatch(str, req, ours, loc) #endif clinfo-3.0.25.02.14/src/ext.h000066400000000000000000000504271475367065600153110ustar00rootroot00000000000000/* Include OpenCL header, and define OpenCL extensions, since what is and is not * available in the official headers is very system-dependent */ #ifndef EXT_H #define EXT_H /* Khronos now provides unified headers for all OpenCL versions, and * it should be included after defining a target OpenCL version * (otherwise, the maximum version will simply be used, but a message * will be printed). * * TODO: until 3.0 gets finalized, we only target 2.2 because the 3.0 * defines etc are still changing, so users may have an older version * of the 3.0 headers lying around, which may prevent clinfo from being * compilable. */ #define CL_TARGET_OPENCL_VERSION 220 /* We will use the deprecated clGetExtensionFunctionAddress, * so let the headers know that we don't care about it being deprecated. * The standard CL_USE_DEPRECATED_OPENCL_1_1_APIS define apparently * doesn't work for macOS, so we'll just tell the compiler to not * warn about deprecated functions. * A more correct solution would be to suppress the warning only around the * clGetExtensionFunctionAddress call, but honestly I just cleaned up that * piece of code. And I'm actually wondering if it even makes sense to * build that part of the code on macOS: does anybody actually use * ocl-icd as OpenCL dispatcher on macOS? */ #ifdef __APPLE__ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include #else #define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include #endif /* Very old headers will be missing these defines */ #ifndef CL_VERSION_1_1 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C #define CL_DEVICE_OPENCL_C_VERSION 0x103D #define CL_FP_SOFT_FLOAT (1 << 6) #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 #endif #ifndef CL_VERSION_1_2 #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_DEVICE_LINKER_AVAILABLE 0x103E #define CL_DEVICE_BUILT_IN_KERNELS 0x103F #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 #define CL_DEVICE_PARTITION_TYPE 0x1046 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) /* cl_device_partition_property */ #define CL_DEVICE_PARTITION_EQUALLY 0x1086 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 /* cl_device_affinity_domain */ #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) #endif /* These two defines were introduced in the 1.2 headers * on 2012-11-30, so earlier versions don't have them * (e.g. Debian wheezy) */ #ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B #endif /* 2.0 headers are not very common for the time being, so * let's copy the defines for the new CL_DEVICE_* properties * here. */ #ifndef CL_VERSION_2_0 #define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C #define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D #define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A #define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E #define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F #define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 #define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 #define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 #define CL_DEVICE_SVM_CAPABILITIES 0x1053 #define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 #define CL_DEVICE_MAX_PIPE_ARGS 0x1055 #define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 #define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 #define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 #define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 #define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) #define CL_DEVICE_SVM_ATOMICS (1 << 3) typedef cl_bitfield cl_device_svm_capabilities; #endif #ifndef CL_VERSION_2_1 #define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 #define CL_DEVICE_IL_VERSION 0x105B #define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C #define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D #endif #ifndef CL_VERSION_3_0 #define CL_PLATFORM_NUMERIC_VERSION 0x0906 #define CL_PLATFORM_EXTENSIONS_WITH_VERSION 0x0907 #define CL_DEVICE_NUMERIC_VERSION 0x105E #define CL_DEVICE_EXTENSIONS_WITH_VERSION 0x1060 #define CL_DEVICE_ILS_WITH_VERSION 0x1061 #define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION 0x1062 #define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 #define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 #define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 #define CL_DEVICE_OPENCL_C_ALL_VERSIONS 0x1066 #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 #define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 #define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 #define CL_DEVICE_OPENCL_C_FEATURES 0x106F #define CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES 0x1070 #define CL_DEVICE_PIPE_SUPPORT 0x1071 #define CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED 0x1072 typedef cl_bitfield cl_device_atomic_capabilities; typedef cl_bitfield cl_device_device_enqueue_capabilities; typedef cl_uint cl_version; #define CL_NAME_VERSION_MAX_NAME_SIZE 64 typedef struct _cl_name_version { cl_version version; char name[CL_NAME_VERSION_MAX_NAME_SIZE]; } cl_name_version; /* cl_device_atomic_capabilities */ #define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) #define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) #define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) #define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) #define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) #define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) #define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES (1 << 6) /* cl_device_device_enqueue_capabilities */ #define CL_DEVICE_QUEUE_SUPPORTED (1 << 0) #define CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT (1 << 1) #endif /* * Extensions */ /* cl_khr_extended_versioning */ // the _KHR fields are the same as the unsuffixed from OpenCL 3 #define CL_PLATFORM_NUMERIC_VERSION_KHR CL_PLATFORM_NUMERIC_VERSION #define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR CL_PLATFORM_EXTENSIONS_WITH_VERSION #define CL_DEVICE_NUMERIC_VERSION_KHR CL_DEVICE_NUMERIC_VERSION #define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F #define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR CL_DEVICE_EXTENSIONS_WITH_VERSION #define CL_DEVICE_ILS_WITH_VERSION_KHR CL_DEVICE_ILS_WITH_VERSION #define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION /* cl_khr_image2d_from_buffer */ // the _KHR fields are the same as the unsuffixed from OpenCL 2 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR CL_DEVICE_IMAGE_PITCH_ALIGNMENT #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT /* cl_khr_icd */ #define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 #define CL_PLATFORM_NOT_FOUND_KHR -1001 /* cl_khr_kernel_clock */ #define CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR 0x1076 typedef cl_bitfield cl_device_kernel_clock_capabilities_khr; #define CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR (1 << 0) #define CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR (1 << 1) #define CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR (1 << 2) /* cl_amd_object_metadata */ #define CL_PLATFORM_MAX_KEYS_AMD 0x403C /* cl_khr_device_uuid extension */ #define CL_UUID_SIZE_KHR 16 #define CL_LUID_SIZE_KHR 8 #define CL_DEVICE_UUID_KHR 0x106A #define CL_DRIVER_UUID_KHR 0x106B #define CL_DEVICE_LUID_VALID_KHR 0x106C #define CL_DEVICE_LUID_KHR 0x106D #define CL_DEVICE_NODE_MASK_KHR 0x106E /* cl_khr_fp64 */ #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 /* cl_khr_fp16 */ #define CL_DEVICE_HALF_FP_CONFIG 0x1033 /* cl_khr_il_program */ #define CL_DEVICE_IL_VERSION_KHR 0x105B /* cl_khr_command_buffer */ #define CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9 #define CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA typedef cl_bitfield cl_device_command_buffer_capabilities_khr; /* cl_khr_command_buffer_multi_device */ #define CL_PLATFORM_COMMAND_BUFFER_CAPABILITIES_KHR 0x0908 typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; #define CL_COMMAND_BUFFER_PLATFORM_UNIVERSAL_SYNC_KHR (1 << 0) #define CL_COMMAND_BUFFER_PLATFORM_REMAP_QUEUES_KHR (1 << 1) #define CL_COMMAND_BUFFER_PLATFORM_AUTOMATIC_REMAP_KHR (1 << 2) #define CL_DEVICE_COMMAND_BUFFER_NUM_SYNC_DEVICES_KHR 0x12AB #define CL_DEVICE_COMMAND_BUFFER_SYNC_DEVICES_KHR 0x12AC /* cl_khr_command_buffer_mutable_dispatch */ #define CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 typedef cl_bitfield cl_mutable_dispatch_fields_khr; /* cl_khr_terminate_context */ #define CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x 0x200F #define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 /* TODO: I cannot find official definitions for these, * so I'm currently extrapolating them from the specification */ typedef cl_bitfield cl_device_terminate_capability_khr; #define CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR (1<<0) /* cl_khr_subgroup_named_barrier */ #define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 /* cl_khr_semaphore */ #define CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036 #define CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C typedef cl_uint cl_semaphore_type_khr; /* cl_khr_external_semaphore */ #define CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037 #define CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038 #define CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D #define CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E typedef cl_uint cl_external_semaphore_handle_type_khr; /* cl_khr_external_memory */ #define CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044 #define CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F // introduced in 0.9.3, according to https://registry.khronos.org/OpenCL/sdk/3.0/docs/man/html/cl_khr_external_memory.html #define CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR 0x2052 typedef cl_uint cl_external_memory_handle_type_khr; /* cl_khr_pci_bus_info */ typedef struct _cl_device_pci_bus_info_khr { cl_uint pci_domain; cl_uint pci_bus; cl_uint pci_device; cl_uint pci_function; } cl_device_pci_bus_info_khr; #define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F /* cl_nv_device_attribute_query */ #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 #define CL_DEVICE_WARP_SIZE_NV 0x4003 #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 #define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 #define CL_DEVICE_PCI_BUS_ID_NV 0x4008 #define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 #define CL_DEVICE_PCI_DOMAIN_ID_NV 0x400A /* cl_ext_atomic_counters_{32,64} */ #define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 /* cl_ext_float_atomics */ typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; /* cl_device_fp_atomic_capabilities_ext */ #define CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT (1 << 0) #define CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT (1 << 1) #define CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT (1 << 2) #define CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT (1 << 16) #define CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT (1 << 17) #define CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT (1 << 18) /* cl_device_info */ #define CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT 0x4231 #define CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT 0x4232 #define CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT 0x4233 /* cl_amd_device_attribute_query */ #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 #define CL_DEVICE_TOPOLOGY_AMD 0x4037 #define CL_DEVICE_BOARD_NAME_AMD 0x4038 #define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 #define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 #define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 #define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 #define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 #define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 #define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 #define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 #define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 #define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A #define CL_DEVICE_GFXIP_MINOR_AMD 0x404B #define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C /* These two are undocumented */ #define CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD 0x404D #define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E /* These were added in v4 of the extension, but have values lower than * than the older ones, and spanning around the cl_ext_atomic_counters_* * define */ #define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 #define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 #define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 #define CL_DEVICE_PCIE_ID_AMD 0x4034 #ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD #define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 typedef union { struct { cl_uint type; cl_uint data[5]; } raw; struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; } cl_device_topology_amd; #endif /* cl_amd_offline_devices */ #define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F /* cl_amd_copy_buffer_p2p */ #define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088 #define CL_DEVICE_P2P_DEVICES_AMD 0x4089 /* cl_ext_cxx_for_opencl */ #define CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230 /* cl_ext_device_fission */ #define cl_ext_device_fission 1 typedef cl_ulong cl_device_partition_property_ext; #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 #define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 #define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 #define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */ #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 #define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 #define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 #define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 #define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 #define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 #define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 /* cl_intel_advanced_motion_estimation */ #define CL_DEVICE_ME_VERSION_INTEL 0x407E /* cl_intel_device_side_avc_motion_estimation */ #define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B #define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C #define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D /* cl_intel_planar_yuv */ #define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E #define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F /* cl_intel_unified_shared_memory */ #define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 #define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 #define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 #define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 #define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 /* cl_qcom_ext_host_ptr */ #define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 #define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 /* cl_arm_shared_virtual_memory */ #define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM CL_DEVICE_SVM_COARSE_GRAIN_BUFFER #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM CL_DEVICE_SVM_FINE_GRAIN_BUFFER #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM CL_DEVICE_SVM_FINE_GRAIN_SYSTEM #define CL_DEVICE_SVM_ATOMICS_ARM CL_DEVICE_SVM_ATOMICS /* cl_arm_core_id */ #define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF /* cl_arm_controlled_kernel_termination */ #define CL_DEVICE_CONTROLLED_TERMINATION_CAPABILITIES_ARM 0x41EE typedef cl_bitfield cl_device_controlled_termination_capabilities_arm; #define CL_DEVICE_CONTROLLED_TERMINATION_SUCCESS_ARM (1 << 0) #define CL_DEVICE_CONTROLLED_TERMINATION_FAILURE_ARM (1 << 1) #define CL_DEVICE_CONTROLLED_TERMINATION_QUERY_ARM (1 << 2) /* cl_khr_spir */ #define CL_DEVICE_SPIR_VERSIONS 0x40E0 /* cl_altera_device_temperature */ #define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 /* cl_intel_simultaneous_sharing */ #define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 #define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 /* cl_intel_required_subgroup_size */ #define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 /* cl_intel_command_queue_families */ #define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL 0x418B typedef cl_bitfield cl_command_queue_capabilities_intel; #define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL 64 typedef struct _cl_queue_family_properties_intel { cl_command_queue_properties properties; cl_command_queue_capabilities_intel capabilities; cl_uint count; char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL]; } cl_queue_family_properties_intel; /* cl_arm_job_slot_selection */ #define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 /* cl_arm_scheduling_controls */ typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm; #define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM 0x41E4 #define CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM (1 << 0) #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM (1 << 1) #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2) #define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM (1 << 3) #define CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM (1 << 4) #define CL_DEVICE_SCHEDULING_WARP_THROTTLING_ARM (1 << 5) #define CL_DEVICE_SCHEDULING_COMPUTE_UNIT_BATCH_QUEUE_SIZE_ARM (1 << 6) #define CL_DEVICE_SCHEDULING_COMPUTE_UNIT_LIMIT_ARM (1 << 7) #define CL_DEVICE_MAX_WARP_COUNT_ARM 0x41EA #define CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM 0x41EB /* cl_intel_device_attribute_query */ typedef cl_bitfield cl_device_feature_capabilities_intel; #define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0) #define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1) #define CL_DEVICE_IP_VERSION_INTEL 0x4250 #define CL_DEVICE_ID_INTEL 0x4251 #define CL_DEVICE_NUM_SLICES_INTEL 0x4252 #define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253 #define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254 #define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255 #define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256 /* clGeICDLoaderInfoOCLICD */ typedef enum { CL_ICDL_OCL_VERSION=1, CL_ICDL_VERSION=2, CL_ICDL_NAME=3, CL_ICDL_VENDOR=4, } cl_icdl_info; #endif clinfo-3.0.25.02.14/src/fmtmacros.h000066400000000000000000000015111475367065600164720ustar00rootroot00000000000000/* cl_ulong is always a 64bit integer, so in a few places we want to use its shadow type uint64_t, and print the values using PRIu64. We'll similarly define one for size_t, to make support for non-standard/older compiler easier. */ #ifndef FMT_MACROS_H #define FMT_MACROS_H #ifdef _WIN32 /* TODO FIXME WIN64 support */ # include # include // size_t # define PRIu32 "I32u" # define PRId32 "I32d" # define PRIx32 "I32x" # define PRIX32 "I32X" # define PRIu64 "I64u" # define PRIx64 "I64x" # define PRIX64 "I64X" # define PRIuS "Iu" #if INTPTR_MAX <= INT32_MAX # define PRIXPTR PRIX32 # define PRIxPTR PRIx32 #else # define PRIXPTR PRIX64 # define PRIxPTR PRIx64 #endif #else # define __STDC_FORMAT_MACROS # include #endif // size_t print spec #ifndef PRIuS # define PRIuS "zu" #endif #endif clinfo-3.0.25.02.14/src/info_loc.h000066400000000000000000000012351475367065600162720ustar00rootroot00000000000000#ifndef INFO_LOC_H #define INFO_LOC_H #include "ext.h" struct info_loc { const char *function; const char *sname; // parameter symbolic name const char *pname; // parameter printable name size_t line; cl_platform_id plat; cl_device_id dev; union { cl_platform_info plat; cl_device_info dev; cl_icdl_info icdl; } param; }; static inline void reset_loc(struct info_loc *loc, const char *func) { loc->function = func; loc->sname = loc->pname = NULL; loc->line = 0; loc->plat = NULL; loc->dev = NULL; loc->param.plat = 0; } #define RESET_LOC_PARAM(_loc, _dev, _param) do { \ _loc.param._dev = _param; \ _loc.sname = #_param; \ } while (0) #endif clinfo-3.0.25.02.14/src/info_ret.h000066400000000000000000000045431475367065600163140ustar00rootroot00000000000000#ifndef INFO_RET_H #define INFO_RET_H #include "ext.h" #include "strbuf.h" /* Return type of the functions that gather platform info */ struct platform_info_ret { cl_int err; /* string representation of the value (if any) */ struct _strbuf str; /* error representation of the value (if any) */ struct _strbuf err_str; /* actual value, when not a string */ union { size_t s; cl_uint u32; cl_ulong u64; } value; /* Does this ret need escaping as JSON? */ cl_bool needs_escaping; }; /* Return type of the functions that print device info */ struct device_info_ret { cl_int err; /* string representation of the value (if any) */ struct _strbuf str; /* error representation of the value (if any) */ struct _strbuf err_str; /* actual value, when not a string */ union { size_t s; cl_long i64; cl_ulong u64; cl_ulong2 u64v2; cl_ulong4 u64v; cl_int i32; cl_uint u32; cl_uint4 u32v; cl_bitfield bits; cl_bool b; cl_device_type devtype; cl_device_mem_cache_type cachetype; cl_device_local_mem_type lmemtype; cl_device_topology_amd devtopo_amd; cl_device_pci_bus_info_khr devtopo_khr; cl_device_scheduling_controls_capabilities_arm sched_controls; cl_device_affinity_domain affinity_domain; cl_device_fp_config fpconfig; cl_device_fp_atomic_capabilities_ext fp_atomic_caps; cl_command_queue_properties qprop; cl_device_command_buffer_capabilities_khr cmdbufcap; cl_device_exec_capabilities execap; cl_device_svm_capabilities svmcap; cl_device_terminate_capability_khr termcap; } value; /* pointer base for array data or other auxiliary information */ union { void *ptr; // TODO cl_context ctx; // associated context } base; /* Does this ret need escaping as JSON? */ cl_bool needs_escaping; }; /* Return type of the functions that gather ICD loader info */ struct icdl_info_ret { cl_int err; /* string representation of the value (if any) */ struct _strbuf str; /* error representation of the value (if any) */ struct _strbuf err_str; }; #define RET_BUF(ret) (ret.err ? &ret.err_str : &ret.str) #define RET_BUF_PTR(ret) (ret->err ? &ret->err_str : &ret->str) #define INIT_RET(ret, msg) do { \ init_strbuf(&ret.str, msg " info string values"); \ init_strbuf(&ret.err_str, msg " info error values"); \ } while (0) #define UNINIT_RET(ret) do { \ free_strbuf(&ret.str); \ free_strbuf(&ret.err_str); \ } while (0) #endif clinfo-3.0.25.02.14/src/memory.h000066400000000000000000000007501475367065600160130ustar00rootroot00000000000000/* Memory handling */ #ifndef MEMORY_H #define MEMORY_H #include #define CHECK_MEM(var, what) do { \ if (!(var)) { \ fprintf(stderr, "%s:%d: %s : Out of memory\n", \ __func__, __LINE__, what); \ exit(1); \ } \ } while (0) #define ALLOC(var, num, what) do { \ var = calloc(num, sizeof(*(var))); \ CHECK_MEM(var, what); \ } while (0) #define REALLOC(var, num, what) do { \ var = realloc(var, (num)*sizeof(*(var))); \ CHECK_MEM(var, what); \ } while (0) #endif clinfo-3.0.25.02.14/src/ms_support.h000066400000000000000000000031751475367065600167220ustar00rootroot00000000000000/* Missing functions and other misc stuff to support * the horrible MS C compiler * * TODO could be improved by version-checking for C99 support */ #ifndef MS_SUPPORT #define MS_SUPPORT // disable warning about unsafe strncpy vs strncpy_s usage #pragma warning(disable : 4996) // disable warning about constant conditional expressions #pragma warning(disable : 4127) // disable warning about non-constant aggregate initializer #pragma warning(disable : 4204) // disable warning about global shadowing #pragma warning(disable : 4459) // disable warning about parameter shadowing #pragma warning(disable : 4457) // Suppress warning about unused parameters. The macro definition // _should_ work, but it doesn't on VS2012 (cl 17), may be a version thing #define UNUSED(x) x __pragma(warning(suppress: 4100)) // TODO FIXME remove full-blown warning removal where not needed #pragma warning(disable: 4100) // No inline in MS C #define inline __inline // No snprintf in MS C, copy over implementation taken from // stackoverflow #include #include inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap) { int count = -1; if (size != 0) count = _vsnprintf_s(str, size, _TRUNCATE, format, ap); if (count == -1) count = _vscprintf(format, ap); return count; } inline int c99_snprintf(char* str, size_t size, const char* format, ...) { int count; va_list ap; va_start(ap, format); count = c99_vsnprintf(str, size, format, ap); va_end(ap); return count; } #define snprintf c99_snprintf // And no __func__ either #define __func__ __FUNCTION__ #endif clinfo-3.0.25.02.14/src/opt_out.h000066400000000000000000000060131475367065600161720ustar00rootroot00000000000000/* clinfo output options */ #ifndef OPT_OUT_H #define OPT_OUT_H #include #include "ext.h" enum output_modes { CLINFO_HUMAN = 1, /* more human readable */ CLINFO_RAW = 2, /* property-by-property */ CLINFO_BOTH = CLINFO_HUMAN | CLINFO_RAW }; /* Specify how we should handle conditional properties. */ enum cond_prop_modes { COND_PROP_CHECK = 0, /* default: check, skip if invalid */ COND_PROP_TRY = 1, /* try, don't print an error if invalid */ COND_PROP_SHOW = 2 /* try, print an error if invalid */ }; /* Output options */ struct opt_out { enum output_modes mode; enum cond_prop_modes cond; /* Specify that we should only print information about specific devices */ /* TODO proper memory management */ #define MAX_SELECTED_DEVICES 256 cl_uint2 selected_devices[MAX_SELECTED_DEVICES]; size_t num_selected_devices; /* Specify that we should only print information about a specific property */ /* TODO proper memory management */ #define MAX_SELECTED_PROPS 256 const char *selected_props[MAX_SELECTED_PROPS]; size_t num_selected_props; /* Specify if we should only be listing the platform and devices; * can be done in both human and raw mode, and only the platform * and device names (and number) will be shown * TODO check if terminal supports UTF-8 and use Unicode line-drawing * for the tree in list mode */ cl_bool brief; cl_bool detailed; // !brief cl_bool offline; cl_bool null_platform; /* JSON output for RAW */ cl_bool json; /* clGetDeviceInfo returns CL_INVALID_VALUE both for unknown properties * and when the destination variable is too small. Set the following to CL_TRUE * to check which one is the case */ cl_bool check_size; }; static inline cl_bool is_selected_platform(const struct opt_out *output, cl_uint p) { if (output->num_selected_devices == 0) return CL_TRUE; for (cl_uint i = 0; i < output->num_selected_devices; ++i) { if (p == output->selected_devices[i].s[0]) return CL_TRUE; } return CL_FALSE; } static inline cl_bool is_selected_device(const struct opt_out *output, cl_uint p, cl_uint d) { if (output->num_selected_devices == 0) return CL_TRUE; for (cl_uint i = 0; i < output->num_selected_devices; ++i) { const cl_uint2 cmp = output->selected_devices[i]; if (p == cmp.s[0] && d == cmp.s[1]) return CL_TRUE; } return CL_FALSE; } static inline cl_bool is_selected_prop(const struct opt_out *output, const char *prop) { if (output->num_selected_props == 0) return CL_TRUE; for (cl_uint i = 0; i < output->num_selected_props; ++i) { if (strstr(prop, output->selected_props[i])) return CL_TRUE; } return CL_FALSE; } static inline cl_bool is_requested_prop(const struct opt_out *output, const char *prop) { // NOTE the difference compared to the above: here we are checking if a specific property // was *requested*, so if none was explicitly requested we return false here. if (output->num_selected_props == 0) return CL_FALSE; for (cl_uint i = 0; i < output->num_selected_props; ++i) { if (strstr(prop, output->selected_props[i])) return CL_TRUE; } return CL_FALSE; } #endif clinfo-3.0.25.02.14/src/strbuf.h000066400000000000000000000071341475367065600160130ustar00rootroot00000000000000/* multi-purpose string _strbuf, will be initialized to be * at least 1024 bytes long. */ #ifndef STRBUF_H #define STRBUF_H #include #include #include #include #include #include "memory.h" #include "fmtmacros.h" struct _strbuf { char *buf; size_t sz; /* allocated size */ size_t end; /* offset to terminating null byte */ }; static inline void realloc_strbuf(struct _strbuf *str, size_t nusz, const char* what) { if (nusz > str->sz) { REALLOC(str->buf, nusz, what); str->sz = nusz; } } static inline void reset_strbuf(struct _strbuf *str) { str->end = 0; if (str->buf) str->buf[0] = '\0'; } static inline void init_strbuf(struct _strbuf *str, const char *what) { str->sz = 0; str->buf = NULL; realloc_strbuf(str, 1024, what); reset_strbuf(str); } static inline void free_strbuf(struct _strbuf *str) { free(str->buf); str->buf = NULL; reset_strbuf(str); } static inline void strbuf_append(const char *what, struct _strbuf *str, const char *fmt, ...) { va_list ap; size_t room = str->sz - str->end - 1; size_t written = 0; /* write if we have room */ va_start(ap, fmt); written = vsnprintf(str->buf + str->end, room, fmt, ap); va_end(ap); /* if we would have written more, we need to expand the storage */ if (written >= room) { realloc_strbuf(str, str->end + written + 1, what); room = str->sz - str->end; /* and re-write */ va_start(ap, fmt); written = vsnprintf(str->buf + str->end, room, fmt, ap); va_end(ap); } str->end += written; } static inline void strbuf_append_str_len(const char *what, struct _strbuf *str, const char *to_append, /* string to append */ size_t len) /* length of string to append */ { size_t room = str->sz - str->end - 1; if (len >= room) { realloc_strbuf(str, str->end + len + 1, what); } /* copy up to the terminating NULL */ memcpy(str->buf + str->end, to_append, len); str->end += len; /* ensure we have a NULL in last position, since len may have been used * to override the original string length */ str->buf[str->end] = '\0'; } static inline void strbuf_append_str(const char *what, struct _strbuf *str, const char *to_append) { strbuf_append_str_len(what, str, to_append, strlen(to_append)); } #define GET_STRING(str, err, cmd, param, param_str, ...) do { \ size_t nusz; \ err = cmd(__VA_ARGS__, param, 0, NULL, &nusz); \ if (REPORT_ERROR(str, err, "get " param_str " size")) break; \ realloc_strbuf(str, nusz, #param); \ err = cmd(__VA_ARGS__, param, (str)->sz, (str)->buf, NULL); \ if (REPORT_ERROR(str, err, "get " param_str)) break; \ (str)->end = nusz; \ } while (0) #define GET_STRING_LOC(ret, loc, cmd, ...) do { \ size_t nusz; \ ret->err = REPORT_ERROR_LOC(ret, \ cmd(__VA_ARGS__, 0, NULL, &nusz), \ loc, "get %s size"); \ if (!ret->err) { \ realloc_strbuf(&ret->str, nusz, loc->sname); \ ret->err = REPORT_ERROR_LOC(ret, \ cmd(__VA_ARGS__, ret->str.sz, ret->str.buf, NULL), \ loc, "get %s"); \ } \ if (!ret->err) { \ ret->str.end = nusz; \ } \ } while (0) /* Skip leading whitespace in a string */ static inline const char* skip_leading_ws(const char *str) { const char *ret = str; while (isspace((unsigned char) *ret)) ++ret; return ret; } /* Separators: we want to be able to prepend separators as needed to _strbuf, * which we do only if halfway through the buffer. The callers should first * call a 'set_separator' and then use add_separator(&offset) to add it, where szval * is an offset inside the buffer, which will be incremented as needed */ const char *sep; size_t sepsz; void set_separator(const char* _sep) { sep = _sep; sepsz = strlen(sep); } #endif