pax_global_header 0000666 0000000 0000000 00000000064 13256213726 0014521 g ustar 00root root 0000000 0000000 52 comment=eb7ad3ab7d6be0f29d8ff98c32a0e8916719424a
clinfo-2.2.18.03.26/ 0000775 0000000 0000000 00000000000 13256213726 0013534 5 ustar 00root root 0000000 0000000 clinfo-2.2.18.03.26/.appveyor.yml 0000664 0000000 0000000 00000000530 13256213726 0016200 0 ustar 00root root 0000000 0000000 version: 2.2.18.03.22-{build}
image: Visual Studio 2015
shallow_clone: true
platform:
- x86
- x64
init:
- cmd: C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
install:
- cmd: fetch-opencl-dev-win.cmd %PLATFORM%
build_script:
- cmd: make.cmd
test_script:
- cmd: clinfo
artifacts:
- path: clinfo.exe
clinfo-2.2.18.03.26/.gitignore 0000664 0000000 0000000 00000000022 13256213726 0015516 0 ustar 00root root 0000000 0000000 clinfo
.*.swp
*.o
clinfo-2.2.18.03.26/.travis.yml 0000664 0000000 0000000 00000000350 13256213726 0015643 0 ustar 00root root 0000000 0000000 os:
- linux
- osx
dist: trusty
addons:
apt:
sources:
- sourceline: "deb http://archive.ubuntu.com/ubuntu trusty universe"
packages:
- ocl-icd-opencl-dev
language: c
compiler:
- gcc
- clang
script: make && ./clinfo
clinfo-2.2.18.03.26/GNUmakefile 0000664 0000000 0000000 00000000102 13256213726 0015577 0 ustar 00root root 0000000 0000000 # GNU Make specifics
OS := $(shell uname -s)
include Makefile
clinfo-2.2.18.03.26/LICENSE 0000664 0000000 0000000 00000000473 13256213726 0014545 0 ustar 00root root 0000000 0000000 clinfo by Giuseppe Bilotta
To the extent possible under law, the person who associated CC0 with
clinfo has waived all copyright and related or neighboring rights
to clinfo.
You should have received a copy of the CC0 legalcode along with this
work. If not, see
clinfo-2.2.18.03.26/Makefile 0000664 0000000 0000000 00000002227 13256213726 0015177 0 ustar 00root root 0000000 0000000 # Headers
PROG = clinfo
MAN = man1/$(PROG).1
HDR = src/error.h \
src/ext.h \
src/fmtmacros.h \
src/memory.h \
src/ms_support.h \
src/strbuf.h
VPATH = src
CFLAGS ?= -g -pedantic -Werror
CFLAGS += -std=c99 -Wall -Wextra
SPARSE ?= sparse
SPARSEFLAGS=-Wsparse-all -Wno-decl
# BSD make does not define RM
RM ?= rm -f
# Installation paths and modes
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
BINMODE ?= 555
MANDIR ?= $(PREFIX)/man
MANMODE ?= 444
# Common library includes
LDLIBS = -lOpenCL -ldl
# OS-specific library includes
LDLIBS_Darwin = -framework OpenCL
LDLIBS_Darwin_exclude = -lOpenCL
LDLIBS += $(LDLIBS_${OS})
# Remove -lOpenCL if OS is Darwin
LDLIBS := $(LDLIBS:$(LDLIBS_${OS}_exclude)=)
#
# Standard targets
#
$(PROG): $(PROG).o
$(PROG).o: $(PROG).c $(HDR)
clean:
$(RM) $(PROG).o $(PROG)
$(BINDIR):
install -d $@
$(MANDIR)/man1:
install -d $@
$(BINDIR)/$(PROG): $(PROG) $(BINDIR)
install -p -m $(BINMODE) $(PROG) $@
$(MANDIR)/$(MAN): $(MAN) $(MANDIR)/man1
install -p -m $(MANMODE) $(MAN) $@
install: $(BINDIR)/$(PROG) $(MANDIR)/$(MAN)
sparse: $(PROG).c
$(SPARSE) $(CPPFLAGS) $(CFLAGS) $(SPARSEFLAGS) $^
.PHONY: clean sparse install
clinfo-2.2.18.03.26/Makefile.win 0000664 0000000 0000000 00000003150 13256213726 0015767 0 ustar 00root root 0000000 0000000 # TODO FIXME find a better way to detect the directory to use
# for OpenCL development files
!IF "$(OPENCLDIR)" == ""
OPENCLDIR = $(INTELOCLSDKROOT)
!ENDIF
!IF "$(OPENCLDIR)" == ""
OPENCLDIR = $(AMDAPPSDKROOT)
!ENDIF
!IF "$(OPENCLDIR)" == ""
OPENCLDIR = $(MAKEDIR)
!ENDIF
!IF "$(OPENCLDIR)" == ""
OPENCLDIR = .
!ENDIF
!MESSAGE OpenCL dir: $(OPENCLDIR)
HDR = src/error.h \
src/ext.h \
src/ctx_prop.h \
src/fmtmacros.h \
src/memory.h \
src/ms_support.h \
src/strbuf.h
CFLAGS = /GL /Ox /W4 /Zi /I"$(OPENCLDIR)\include" /nologo
LIBS = libOpenCL.a
# TODO there's most likely a better way to do the multiarch
# switching
!IF "$(PROCESSOR_ARCHITECTURE)" == "AMD64"
ARCH=64
!ELSE
ARCH=32
!ENDIF
# Platform=x64 in the 64-bit cross-platform build of my VS
!IF "$(PLATFORM)" == "x64" || "$(PLATFORM)" == "X64"
ARCH=64
!ELSE IF "$(PLATFORM)" == "x86" || "$(PLATFORM)" == "X86"
ARCH=32
!ENDIF
!MESSAGE Building for $(ARCH)-bit (processor architecture: $(PROCESSOR_ARCHITECTURE), platform: $(PLATFORM))
LIBPATH32 = /LIBPATH:"$(OPENCLDIR)\lib" /LIBPATH:"$(OPENCLDIR)\lib\x86"
LIBPATH64 = /LIBPATH:"$(OPENCLDIR)\lib\x64" /LIBPATH:"$(OPENCLDIR)\lib\x86_64" /LIBPATH:"$(OPENCLDIR)\lib\x86_amd64"
# And since we can't do $(LIBPATH$(ARCH)) with nmake ...
!IF "$(ARCH)" == "64"
LINKOPTS = /LTCG $(LIBPATH64) /nologo
!ELSE
LINKOPTS = /LTCG $(LIBPATH32) /nologo
!ENDIF
clinfo.exe: clinfo.obj
link $(LINKOPTS) $(LIBS) clinfo.obj /out:clinfo.exe
clinfo.obj: src/clinfo.c $(HDR)
$(CC) $(CFLAGS) /c src/clinfo.c /Foclinfo.obj
clean:
del /F /Q clinfo.exe clinfo.obj
.PHONY: clean
clinfo-2.2.18.03.26/README.md 0000664 0000000 0000000 00000005126 13256213726 0015017 0 ustar 00root root 0000000 0000000 # What is this?
clinfo is a simple command-line application that enumerates all possible
(known) properties of the OpenCL platform and devices available on the
system.
Inspired by AMD's program of the same name, it is coded in pure C and it
tries to output all possible information, including those provided by
platform-specific extensions, trying not to crash on unsupported
properties (e.g. 1.2 properties on 1.1 platforms).
# Usage
clinfo [options...]
Common used options are `-l` to show a synthetic summary of the
available devices (without properties), and `-a`, to try and show
properties even if `clinfo` would otherwise think they aren't supported
by the platform or device.
Refer to the man page for further information.
## Use cases
* verify that your OpenCL environment is set up correctly;
if `clinfo` cannot find any platform or devices (or fails to load
the OpenCL dispatcher library), chances are high no other OpenCL
application will run;
* verify that your OpenCL _development_ environment is set up
correctly: if `clinfo` fails to build, chances are high no
other OpenCL application will build;
* explore/report the actual properties of the available device(s).
# Building
Building requires an OpenCL SDK (or at least OpenCL headers and
development files), and the standard build environment for the platform.
No special build system is used (autotools, CMake, meson, ninja, etc),
as I feel adding more dependencies for such a simple program would be
excessive. Simply running `make` at the project root should work.
## Windows support
The application can usually be built in Windows too (support for which
required way more time than I should have spent, really, but I digress),
by running `make` in a Developer Command Prompt for Visual Studio,
provided an OpenCL SDK (such as the Intel or AMD one) is installed.
Precompiled Windows executable are available as artefacts of the
AppVeyor CI.
clinfo-2.2.18.03.26/fetch-opencl-dev-win.cmd 0000664 0000000 0000000 00000001040 13256213726 0020132 0 ustar 00root root 0000000 0000000 REM call as fetch-opencl-dev-win x86|x86_64|x64
git clone https://github.com/KhronosGroup/OpenCL-Headers
move OpenCL-Headers/opencl22 include
set sub=%1
if /i "%sub%" == "x64" set sub=x86_64
mkdir lib\%sub%
curl -L -o lib/%sub%/libOpenCL.a https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/lib/%sub%/libOpenCL.a -o lib/%sub%/OpenCL.lib https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/lib/%sub%/OpenCL.lib -o OpenCL.dll https://github.com/AMD-FirePro/SDK/raw/master/external/opencl-1.2/bin/%sub%/OpenCL.dll
clinfo-2.2.18.03.26/legalcode.txt 0000664 0000000 0000000 00000015610 13256213726 0016217 0 ustar 00root root 0000000 0000000 Creative Commons Legal Code
CC0 1.0 Universal
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.
clinfo-2.2.18.03.26/make.cmd 0000664 0000000 0000000 00000000042 13256213726 0015132 0 ustar 00root root 0000000 0000000 nmake /F Makefile.win /nologo %*
clinfo-2.2.18.03.26/man1/ 0000775 0000000 0000000 00000000000 13256213726 0014370 5 ustar 00root root 0000000 0000000 clinfo-2.2.18.03.26/man1/clinfo.1 0000664 0000000 0000000 00000017010 13256213726 0015723 0 ustar 00root root 0000000 0000000 .TH CLINFO 1 "2018-03-26" "clinfo 2.2.18.03.26"
.SH NAME
clinfo \- show OpenCL platforms and devices
.SH SYNOPSIS
.B clinfo
.RI [ "options ..." ]
.SH DESCRIPTION
.B clinfo
prints all available information about all OpenCL platforms
available on the system and the devices they expose.
.SH OPTIONS
.B clinfo
accepts the following options:
.TP 2
.BR -a ", " --all-props
try to retrieve all properties, even those not officially supported
(e.g. because they require specific extensions), but only show them
if the property could be retrieved successfully; see also the
.B LIMITATIONS
section below; note that even though this may reveal hidden properties,
there is no guarantee that the returned values are meaningful, nor that
the corresponding feature is actually available at all;
.TP
.BR -A ", " --always-all-props
like
.BR -a ,
but also show errors;
.TP
.B --human
produce human-friendly output; this is the default (except
as noted below);
.TP
.B --raw
produce machine-friendly output; this is the default if
.B clinfo
is invoked with a name that contains the string
.RI \*(lq raw \*(rq;
.TP
.B --offline
shows also offline devices for platforms that expose this feature;
.TP
.BR -l ", " --list
list platforms and devices by name, with no (other) properties;
.TP
.BR -h ", " -?
show usage;
.TP
.BR --version ", " -v
show program version.
.SH CONFORMING TO
OpenCL 1.1, OpenCL 1.2, OpenCL 2.0, OpenCL 2.1, OpenCL 2.2.
.SH EXTENSIONS
Supported OpenCL extensions:
.TP 2
.BR cl_khr_fp16 ", " cl_khr_fp64 ", " cl_amd_fp64 ", " cl_APPLE_fp64_basic_ops
for information about support for half-precision and double-precision
floating-point data types;
.TP
.B cl_khr_image2d_from_buffer
for information about the base address and pitch alignment requirements
of buffers to be used as base for 2D images;
.TP
.B cl_khr_il_program
for information about the supported IL (Intermediate Language) representations;
.TP
.B cl_khr_spir
for information about the supported SPIR (Standard Portable Intermediate
Representation) versions;
.TP
.B cl_khr_icd
for the suffix of vendor extensions functions;
.TP
.B cl_khr_subgroup_named_barrier
for the maximum number of named sub-group barriers;
.TP
.B cl_khr_terminate_context
for the terminate capabilities for the device;
.TP
.B cl_ext_device_fission
for device fission support in OpenCL 1.1 devices;
.TP
.B cl_ext_atomic_counters_32
.TQ
.B cl_ext_atomic_counters_64
for the atomic counter extension;
.TP
.B cl_amd_device_attribute_query
for AMD-specific device attributes;
.TP
.B cl_amd_object_metadata
to show the maximum number of keys supported by the platform;
.TP
.B cl_amd_offline_devices
to show offline devices exposed by the platform, if requested (see
.B --offline
option);
.TP
.B cl_amd_copy_buffer_p2p
to show the number and IDs of available P2P devices;
.TP
.B cl_amd_svm
.TQ
.B cl_arm_shared_virtual_memory
for Shared Virtual Memory (SVM) capabilities in OpenCL 1.2 devices;
.TP
.B cl_nv_device_attribute_query
for NVIDIA-specific device attributes;
.TP
.B cl_intel_exec_by_local_thread
for the Intel extension allowing CPU devices to run kernels as part of
the current host thread;
.TP
.B cl_intel_advanced_motion_estimation
for the version of the Intel Motion Estimation accelerator version;
.TP
.B cl_intel_device_side_avc_motion_estimation
for the version and supported features of Intel's device-side AVC Motion;
.TP
.B cl_intel_planar_yuv
for the maximum dimensions of planar YUV images;
.TP
.B cl_intel_simultaneous_sharing
for simultaneous CL/GL/DirectX context sharing (only partial support);
.TP
.B cl_intel_required_subgroup_size
to enumerate allowed sub-group sizes;
.TP
.B cl_altera_device_temperature
for the Altera extension to query the core temperature of the device;
.TP
.B cl_qcom_ext_host_ptr
for the QUALCOMM extension to query page size and required padding in external
memory allocation.
.SH NOTES
Some information is duplicated when available from multiple sources.
Examples:
.IP \(bu 2
supported device partition types and domains as obtained using the
.B cl_ext_device_fission
extension typically match the ones obtained using
the core OpenCL 1.2 device partition feature;
.IP \(bu
the preferred work-group size multiple matches the NVIDIA warp size (on
NVIDIA devices) or the AMD wavefront width (on AMD devices).
.P
Some floating-point configuration flags may only be meaningful for
specific precisions and/or specific OpenCL versions. For example,
.B CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
is only relevant for single precision in OpenCL 1.2 devices.
.P
The implementation-defined behavior for NULL platform or context
properties is tested for the following API calls:
.TP 2
.B clGetPlatformInfo()
by trying to show the platform name;
.TP
.B clGetDeviceIDs()
by trying to enumerate devices; the corresponding platform (if any)
is then detected by querying the device platform of the first device;
.TP
.B clCreateteContext()
by trying to create a context from a device from the previous
list (if any), and a context from a device from a different platform;
.TP
.B clCreateteContextFromType()
by trying to create contexts for each device type (except DEFAULT).
.SH EXPERIMENTAL FEATURES
.P
Support for OpenCL 2.x properties is not fully tested.
.P
Support for
.B cl_khr_subgroup_named_barrier
is experimental due to missing definitions in the official OpenCL headers.
.P
Raw (machine-parsable) output is considered experimental, the output format
might still undergo changes.
.P
The properties of the ICD loader will also be queried if the
.B clGetICDLoaderInfoOCLICD
extension function is found.
.P
Support for the properties exposed by
.B cl_amd_copy_buffer_p2p
is experimental.
.P
Support for some (documented and undocumented) properties exposed by
.B cl_amd_device_attribute_query
is experimental (see also
.BR LIMITATIONS ).
.P
Support for the interop lists exposed by
.B cl_intel_simultaneous_sharing
is experimental.
.P
The highest OpenCL version supported by the ICD loader is detected
with some trivial heuristics (symbols found); a notice is output
if this is lower than the highest platform OpenCL version, or
if the detected version doesn't match the one declared by the ICD
loader itself.
.SH LIMITATIONS
.P
OpenCL provides no explicit mean to detect the supported version
of any extension exposed by a device, which makes it impossible to
determine a priori if it will be possible to successfully query
a device about a specific property.
Additionally, the actual size and meaning of some properties are not
officially declared anywhere.
.P
Most notably, this affects extensions such as
.B cl_amd_device_attribute_query
and
.BR cl_nv_device_attribute_query .
Heuristics based on standard version support are partially used in the code to
determine which version may be supported.
.P
Properties which are known to be affected by these limitations include:
.TP 2
.B CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
documented in v3 of the
.B cl_amd_device_attribute_query
extension specification as being the global free memory in KBytes, without
any explanation given on why there are two values;
.TP
.B CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD
documented in v3 of the
.B cl_amd_device_attribute_query
extension specification, but not reported by drivers supporting other v3
properties. This has now been enabled for drivers
.I assumed
to support v4 of the same extension;
.TP
.B CL_DEVICE_TERMINATE_CAPABILITY_KHR
exposed by the
.B cl_khr_terminate_context
has changed value between OpenCL 1.x and 2.x, and it's
.I allegedly
a bitfield, whose values are however not defined anywhere.
.SH BUGS
.P
Please report any issues on
.UR http://github.com/Oblomov/clinfo
the project tracker on GitHub
.UE .
clinfo-2.2.18.03.26/new-version.sh 0000775 0000000 0000000 00000001314 13256213726 0016346 0 ustar 00root root 0000000 0000000 #!/bin/sh
# Change the version recorded in src/clinfo.c and man1/clinfo.1 to
# the current highest OpenCL supported standard followed by current
# yy.mm.dd
abort() {
echo "$1" >&2
exit 1
}
test -n "$(git status --porcelain | grep -v '??')" && abort "Uncommited changes, aborting"
DATE=$(date +%Y-%m-%d)
MAJOR=$(awk '/^OpenCL/ { print $NF ; exit }' man1/clinfo.1)
SUBV=$(date +%y.%m.%d)
VERSION="$MAJOR$SUBV"
sed -i -e "/clinfo version/ s/version \S\+\"/version $VERSION\"/" src/clinfo.c &&
sed -i -e "1 s/\".\+$/\"$DATE\" \"clinfo $VERSION\"/" man1/clinfo.1 &&
sed -i -e "1 s/\".\+$/version: $VERSION-{build}/" .appveyor.yml &&
git commit -m "Version $VERSION" -e -a &&
git tag -m "Version $VERSION" $VERSION
clinfo-2.2.18.03.26/src/ 0000775 0000000 0000000 00000000000 13256213726 0014323 5 ustar 00root root 0000000 0000000 clinfo-2.2.18.03.26/src/clinfo.c 0000664 0000000 0000000 00000305352 13256213726 0015751 0 ustar 00root root 0000000 0000000 /* Collect all available information on all available devices
* on all available OpenCL platforms present in the system
*/
#include
#include
/* We will want to check for symbols in the OpenCL library.
* On Windows, we must get the module handle for it, on Unix-like
* systems we can just use RTLD_DEFAULT
*/
#ifdef _MSC_VER
# include
# define dlsym GetProcAddress
# define DL_MODULE GetModuleHandle("OpenCL")
#else
# include
# define DL_MODULE ((void*)0) /* This would be RTLD_DEFAULT */
#endif
/* Load STDC format macros (PRI*), or define them
* for those crappy, non-standard compilers
*/
#include "fmtmacros.h"
// More support for the horrible MS C compiler
#ifdef _MSC_VER
#include "ms_support.h"
#endif
#include "error.h"
#include "memory.h"
#include "strbuf.h"
#include "ext.h"
#include "ctx_prop.h"
#define ARRAY_SIZE(ar) (sizeof(ar)/sizeof(*ar))
#ifndef UNUSED
#define UNUSED(x) x __attribute__((unused))
#endif
struct platform_data {
char *pname; /* CL_PLATFORM_NAME */
char *sname; /* CL_PLATFORM_ICD_SUFFIX_KHR or surrogate */
cl_uint ndevs; /* number of devices */
cl_bool has_amd_offline; /* has cl_amd_offline_devices extension */
};
struct platform_info_checks {
int has_khr_icd;
int has_amd_object_metadata;
cl_uint plat_version;
};
cl_uint num_platforms;
cl_platform_id *platform;
struct platform_info_checks *platform_checks;
/* highest version exposed by any platform: if the OpenCL library (the ICD loader)
* has a lower version, problems may arise (such as API calls causing segfaults
* or any other unexpected behavior
*/
cl_uint max_plat_version;
/* auto-detected OpenCL version support for the ICD loader */
cl_uint icdl_ocl_version_found = 10;
/* OpenCL version support declared by the ICD loader */
cl_uint icdl_ocl_version;
struct platform_data *pdata;
/* maximum length of a platform's sname */
size_t platform_sname_maxlen;
/* maximum number of devices */
cl_uint maxdevs;
/* line prefix, used to identify the platform/device for each
* device property in RAW output mode */
char *line_pfx;
int line_pfx_len;
cl_uint num_devs_all;
cl_device_id *all_devices;
enum output_modes {
CLINFO_HUMAN = 1, /* more human readable */
CLINFO_RAW = 2, /* property-by-property */
CLINFO_BOTH = CLINFO_HUMAN | CLINFO_RAW
};
enum output_modes output_mode = CLINFO_HUMAN;
/* Specify if we should only be listing the platform and devices;
* can be done in both human and raw mode, and only the platform
* and device names (and number) will be shown
* TODO check if terminal supports UTF-8 and use Unicode line-drawing
* for the tree in list mode
*/
cl_bool list_only = CL_FALSE;
/* Specify how we should handle conditional properties. */
enum cond_prop_modes {
COND_PROP_CHECK = 0, /* default: check, skip if invalid */
COND_PROP_TRY = 1, /* try, don't print an error if invalid */
COND_PROP_SHOW = 2 /* try, print an error if invalid */
};
enum cond_prop_modes cond_prop_mode = COND_PROP_CHECK;
/* The property is skipped if this was a conditional property,
* unsatisfied, there was an error retrieving it and cond_prop_mode is not
* COND_PROP_SHOW.
*/
#define CHECK_SKIP(checked) if (!checked && had_error && cond_prop_mode != COND_PROP_SHOW) return had_error
/* clGetDeviceInfo returns CL_INVALID_VALUE both for unknown properties
* and when the destinaiton variable is too small. Set the following to CL_TRUE
* to check which one is the case
*/
static const cl_bool check_size = CL_FALSE;
#define CHECK_SIZE(cmd, ...) do { \
/* check if the issue is with param size */ \
if (check_size && error == CL_INVALID_VALUE) { \
size_t _actual_sz; \
if (cmd(__VA_ARGS__, 0, NULL, &_actual_sz) == CL_SUCCESS) { REPORT_SIZE_MISMATCH(_actual_sz, sizeof(val)); } \
} \
} while (0)
static const char unk[] = "Unknown";
static const char none[] = "None";
static const char none_raw[] = "CL_NONE";
static const char na[] = "n/a"; // not available
static const char core[] = "core"; // not available
static const char bytes_str[] = " bytes";
static const char pixels_str[] = " pixels";
static const char images_str[] = " images";
static const char* bool_str[] = { "No", "Yes" };
static const char* bool_raw_str[] = { "CL_FALSE", "CL_TRUE" };
static const char* endian_str[] = { "Big-Endian", "Little-Endian" };
static const cl_device_type devtype[] = { 0,
CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU,
CL_DEVICE_TYPE_ACCELERATOR, CL_DEVICE_TYPE_CUSTOM, CL_DEVICE_TYPE_ALL };
const size_t devtype_count = ARRAY_SIZE(devtype);
/* number of actual device types, without ALL */
const size_t actual_devtype_count = ARRAY_SIZE(devtype) - 1;
static const char* device_type_str[] = { unk, "Default", "CPU", "GPU", "Accelerator", "Custom", "All" };
static const char* device_type_raw_str[] = { unk,
"CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU",
"CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_CUSTOM", "CL_DEVICE_TYPE_ALL"
};
static const char* partition_type_str[] = {
"none specified", none, "equally", "by counts", "by affinity domain", "by names (Intel)"
};
static const char* partition_type_raw_str[] = {
"NONE SPECIFIED",
none_raw,
"CL_DEVICE_PARTITION_EQUALLY_EXT",
"CL_DEVICE_PARTITION_BY_COUNTS_EXT",
"CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT",
"CL_DEVICE_PARTITION_BY_NAMES_INTEL_EXT"
};
static const char numa[] = "NUMA";
static const char l1cache[] = "L1 cache";
static const char l2cache[] = "L2 cache";
static const char l3cache[] = "L3 cache";
static const char l4cache[] = "L4 cache";
static const char* affinity_domain_str[] = {
numa, l4cache, l3cache, l2cache, l1cache, "next partitionable"
};
static const char* affinity_domain_ext_str[] = {
numa, l4cache, l3cache, l2cache, l1cache, "next fissionable"
};
static const char* affinity_domain_raw_str[] = {
"CL_DEVICE_AFFINITY_DOMAIN_NUMA",
"CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE",
"CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE",
"CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE",
"CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE",
"CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE"
};
static const char* affinity_domain_raw_ext_str[] = {
"CL_AFFINITY_DOMAIN_NUMA_EXT",
"CL_AFFINITY_DOMAIN_L4_CACHE_EXT",
"CL_AFFINITY_DOMAIN_L3_CACHE_EXT",
"CL_AFFINITY_DOMAIN_L2_CACHE_EXT",
"CL_AFFINITY_DOMAIN_L1_CACHE_EXT",
"CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT"
};
const size_t affinity_domain_count = ARRAY_SIZE(affinity_domain_str);
static const char *terminate_capability_str[] = {
"Context"
};
static const char *terminate_capability_raw_str[] = {
"CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR"
};
const size_t terminate_capability_count = ARRAY_SIZE(terminate_capability_str);
static const char* fp_conf_str[] = {
"Denormals", "Infinity and NANs", "Round to nearest", "Round to zero",
"Round to infinity", "IEEE754-2008 fused multiply-add",
"Support is emulated in software",
"Correctly-rounded divide and sqrt operations"
};
static const char* fp_conf_raw_str[] = {
"CL_FP_DENORM",
"CL_FP_INF_NAN",
"CL_FP_ROUND_TO_NEAREST",
"CL_FP_ROUND_TO_ZERO",
"CL_FP_ROUND_TO_INF",
"CL_FP_FMA",
"CL_FP_SOFT_FLOAT",
"CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT"
};
const size_t fp_conf_count = ARRAY_SIZE(fp_conf_str);
static const char* svm_cap_str[] = {
"Coarse-grained buffer sharing",
"Fine-grained buffer sharing",
"Fine-grained system sharing",
"Atomics"
};
static const char* svm_cap_raw_str[] = {
"CL_DEVICE_SVM_COARSE_GRAIN_BUFFER",
"CL_DEVICE_SVM_FINE_GRAIN_BUFFER",
"CL_DEVICE_SVM_FINE_GRAIN_SYSTEM",
"CL_DEVICE_SVM_ATOMICS",
};
const size_t svm_cap_count = ARRAY_SIZE(svm_cap_str);
/* SI suffixes for memory sizes. Note that in OpenCL most of them are
* passed via a cl_ulong, which at most can mode 16 EiB, but hey,
* let's be forward-thinking ;-)
*/
static const char* memsfx[] = {
"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
};
const size_t memsfx_end = ARRAY_SIZE(memsfx) + 1;
static const char* lmem_type_str[] = { none, "Local", "Global" };
static const char* lmem_type_raw_str[] = { none_raw, "CL_LOCAL", "CL_GLOBAL" };
static const char* cache_type_str[] = { none, "Read-Only", "Read/Write" };
static const char* cache_type_raw_str[] = { none_raw, "CL_READ_ONLY_CACHE", "CL_READ_WRITE_CACHE" };
static const char* queue_prop_str[] = { "Out-of-order execution", "Profiling" };
static const char* queue_prop_raw_str[] = {
"CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE",
"CL_QUEUE_PROFILING_ENABLE"
};
const size_t queue_prop_count = ARRAY_SIZE(queue_prop_str);
static const char* execap_str[] = { "Run OpenCL kernels", "Run native kernels" };
static const char* execap_raw_str[] = {
"CL_EXEC_KERNEL",
"CL_EXEC_NATIVE_KERNEL"
};
const size_t execap_count = ARRAY_SIZE(execap_str);
static const char* sources[] = {
"#define GWO(type) global type* restrict\n",
"#define GRO(type) global const type* restrict\n",
"#define BODY int i = get_global_id(0); out[i] = in1[i] + in2[i]\n",
"#define _KRN(T, N) void kernel sum##N(GWO(T##N) out, GRO(T##N) in1, GRO(T##N) in2) { BODY; }\n",
"#define KRN(N) _KRN(float, N)\n",
"KRN()\n/* KRN(2)\nKRN(4)\nKRN(8)\nKRN(16) */\n",
};
const char *no_plat(void)
{
return output_mode == CLINFO_HUMAN ?
"No platform" :
"CL_INVALID_PLATFORM";
}
const char *invalid_dev_type(void)
{
return output_mode == CLINFO_HUMAN ?
"Invalid device type for platform" :
"CL_INVALID_DEVICE_TYPE";
}
const char *invalid_dev_value(void)
{
return output_mode == CLINFO_HUMAN ?
"Invalid device type value for platform" :
"CL_INVALID_VALUE";
}
const char *no_dev_found(void)
{
return output_mode == CLINFO_HUMAN ?
"No devices found in platform" :
"CL_DEVICE_NOT_FOUND";
}
const char *no_dev_avail(void)
{
return output_mode == CLINFO_HUMAN ?
"No devices available in platform" :
"CL_DEVICE_NOT_AVAILABLE";
}
/* OpenCL context interop names */
typedef struct cl_interop_name {
cl_uint from;
cl_uint to;
/* 5 because that's the largest we know of,
* 2 because it's HUMAN, RAW */
const char *value[5][2];
} cl_interop_name;
static const cl_interop_name cl_interop_names[] = {
{ /* cl_khr_gl_sharing */
CL_GL_CONTEXT_KHR,
CL_CGL_SHAREGROUP_KHR,
{
{ "GL", "CL_GL_CONTEXT_KHR" },
{ "EGL", "CL_EGL_DISPALY_KHR" },
{ "GLX", "CL_GLX_DISPLAY_KHR" },
{ "WGL", "CL_WGL_HDC_KHR" },
{ "CGL", "CL_CGL_SHAREGROUP_KHR" }
}
},
{ /* cl_khr_dx9_media_sharing */
CL_CONTEXT_ADAPTER_D3D9_KHR,
CL_CONTEXT_ADAPTER_DXVA_KHR,
{
{ "D3D9 (KHR)", "CL_CONTEXT_ADAPTER_D3D9_KHR" },
{ "D3D9Ex (KHR)", "CL_CONTEXT_ADAPTER_D3D9EX_KHR" },
{ "DXVA (KHR)", "CL_CONTEXT_ADAPTER_DXVA_KHR" }
}
},
{ /* cl_khr_d3d10_sharing */
CL_CONTEXT_D3D10_DEVICE_KHR,
CL_CONTEXT_D3D10_DEVICE_KHR,
{
{ "D3D10", "CL_CONTEXT_D3D10_DEVICE_KHR" }
}
},
{ /* cl_khr_d3d11_sharing */
CL_CONTEXT_D3D11_DEVICE_KHR,
CL_CONTEXT_D3D11_DEVICE_KHR,
{
{ "D3D11", "CL_CONTEXT_D3D11_DEVICE_KHR" }
}
},
{ /* cl_intel_dx9_media_sharing */
CL_CONTEXT_D3D9_DEVICE_INTEL,
CL_CONTEXT_DXVA_DEVICE_INTEL,
{
{ "D3D9 (INTEL)", "CL_CONTEXT_D3D9_DEVICE_INTEL" },
{ "D3D9Ex (INTEL)", "CL_CONTEXT_D3D9EX_DEVICE_INTEL" },
{ "DXVA (INTEL)", "CL_CONTEXT_DXVA_DEVICE_INTEL" }
}
},
{ /* cl_intel_va_api_media_sharing */
CL_CONTEXT_VA_API_DISPLAY_INTEL,
CL_CONTEXT_VA_API_DISPLAY_INTEL,
{
{ "VA-API", "CL_CONTEXT_VA_API_DISPLAY_INTEL" }
}
}
};
const size_t num_known_interops = ARRAY_SIZE(cl_interop_names);
/* preferred workgroup size multiple for each kernel
* have not found a platform where the WG multiple changes,
* but keep this flexible (this can grow up to 5)
*/
#define NUM_KERNELS 1
size_t wgm[NUM_KERNELS];
#define INDENT " "
#define I0_STR "%-48s "
#define I1_STR " %-46s "
#define I2_STR " %-44s "
static const char empty_str[] = "";
static const char spc_str[] = " ";
static const char times_str[] = "x";
static const char comma_str[] = ", ";
static const char vbar_str[] = " | ";
int had_error = 0;
const char *cur_sfx = empty_str;
/* parse a CL_DEVICE_VERSION or CL_PLATFORM_VERSION info to determine the OpenCL version.
* Returns an unsigned integer in the form major*10 + minor
*/
cl_uint
getOpenCLVersion(const char *version)
{
cl_uint ret = 10;
long parse = 0;
const char *from = version;
char *next = NULL;
parse = strtol(from, &next, 10);
if (next != from) {
ret = parse*10;
// skip the dot TODO should we actually check for the dot?
from = ++next;
parse = strtol(from, &next, 10);
if (next != from)
ret += parse;
}
return ret;
}
/* print strbuf, prefixed by pname, skipping leading whitespace if skip is nonzero,
* affixing cur_sfx */
static inline
void show_strbuf(const char *pname, int skip)
{
printf("%s" I1_STR "%s%s\n",
line_pfx, pname,
(skip ? skip_leading_ws(strbuf) : strbuf),
had_error ? empty_str : cur_sfx);
}
int
platform_info_str(cl_platform_id pid, cl_platform_info param, const char* pname, const struct platform_info_checks* UNUSED(chk), int checked)
{
GET_STRING2(clGetPlatformInfo, pid, param);
CHECK_SKIP(checked);
/* when only listing, do not print anything, we're just gathering
* information
*/
if (!list_only)
show_strbuf(pname, 1);
return had_error;
}
int
platform_info_ulong(cl_platform_id pid, cl_platform_info param, const char* pname, const struct platform_info_checks* UNUSED(chk), int checked)
{
cl_ulong val = 0;
error = clGetPlatformInfo(pid, param, sizeof(val), &val, NULL);
had_error = REPORT_ERROR2("get %s");
CHECK_SIZE(clGetPlatformInfo, pid, param);
CHECK_SKIP(checked);
/* when only listing, do not print anything, we're just gathering
* information
*/
if (!list_only) {
if (had_error)
show_strbuf(pname, 0);
else
printf("%s" I1_STR "%" PRIu64 "%s\n", line_pfx, pname, val, cur_sfx);
}
return had_error;
}
int
platform_info_sz(cl_platform_id pid, cl_platform_info param, const char* pname, const struct platform_info_checks* UNUSED(chk), int checked)
{
size_t val = 0;
error = clGetPlatformInfo(pid, param, sizeof(val), &val, NULL);
had_error = REPORT_ERROR2("get %s");
CHECK_SIZE(clGetPlatformInfo, pid, param);
CHECK_SKIP(checked);
/* when only listing, do not print anything, we're just gathering
* information
*/
if (!list_only) {
if (had_error)
show_strbuf(pname, 0);
else
printf("%s" I1_STR "%" PRIuS "%s\n", line_pfx, pname, val, cur_sfx);
}
return had_error;
}
struct platform_info_traits {
cl_platform_info param; // CL_PLATFORM_*
const char *sname; // "CL_PLATFORM_*"
const char *pname; // "Platform *"
const char *sfx; // suffix for the output in non-raw mode
/* pointer to function that shows the parameter */
int (*show_func)(cl_platform_id pid, cl_platform_info param, const char *pname, const struct platform_info_checks *, int checked);
/* pointer to function that checks if the parameter should be checked */
int (*check_func)(const struct platform_info_checks *);
};
int khr_icd_p(const struct platform_info_checks *chk)
{
return chk->has_khr_icd;
}
int plat_is_20(const struct platform_info_checks *chk)
{
return !(chk->plat_version < 20);
}
int plat_is_21(const struct platform_info_checks *chk)
{
return !(chk->plat_version < 21);
}
int plat_has_amd_object_metadata(const struct platform_info_checks *chk)
{
return chk->has_amd_object_metadata;
}
#define PINFO_COND(symbol, name, sfx, typ, funcptr) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, &funcptr }
#define PINFO(symbol, name, sfx, typ) { symbol, #symbol, "Platform " name, sfx, &platform_info_##typ, NULL }
struct platform_info_traits pinfo_traits[] = {
PINFO(CL_PLATFORM_NAME, "Name", NULL, str),
PINFO(CL_PLATFORM_VENDOR, "Vendor", NULL, str),
PINFO(CL_PLATFORM_VERSION, "Version", NULL, str),
PINFO(CL_PLATFORM_PROFILE, "Profile", NULL, str),
PINFO(CL_PLATFORM_EXTENSIONS, "Extensions", NULL, str),
PINFO_COND(CL_PLATFORM_MAX_KEYS_AMD, "Max metadata object keys (AMD)", NULL, sz, plat_has_amd_object_metadata),
PINFO_COND(CL_PLATFORM_HOST_TIMER_RESOLUTION, "Host timer resolution", "ns", ulong, plat_is_21),
PINFO_COND(CL_PLATFORM_ICD_SUFFIX_KHR, "Extensions function suffix", NULL, str, khr_icd_p)
};
/* Print platform info and prepare arrays for device info */
void
printPlatformInfo(cl_uint p)
{
cl_platform_id pid = platform[p];
size_t len = 0;
struct platform_info_checks *pinfo_checks = platform_checks + p;
pinfo_checks->plat_version = 10;
current_function = __func__;
for (current_line = 0; current_line < ARRAY_SIZE(pinfo_traits); ++current_line) {
const struct platform_info_traits *traits = pinfo_traits + current_line;
const char *pname = (output_mode == CLINFO_HUMAN ?
traits->pname : traits->sname);
/* checked is true if there was no condition to check for, or if the
* condition was satisfied
*/
int checked = !(traits->check_func && !traits->check_func(pinfo_checks));
current_param = traits->sname;
if (cond_prop_mode == COND_PROP_CHECK && !checked)
continue;
cur_sfx = (output_mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
strbuf[0] = '\0';
had_error = traits->show_func(pid, traits->param,
pname, pinfo_checks, checked);
if (had_error)
continue;
/* post-processing */
switch (traits->param) {
case CL_PLATFORM_NAME:
/* Store name for future reference */
len = strlen(strbuf);
ALLOC(pdata[p].pname, len+1, "platform name copy");
/* memcpy instead of strncpy since we already have the len
* and memcpy is possibly more optimized */
memcpy(pdata[p].pname, strbuf, len);
pdata[p].pname[len] = '\0';
break;
case CL_PLATFORM_VERSION:
/* compute numeric value for OpenCL version */
pinfo_checks->plat_version = getOpenCLVersion(strbuf + 7);
break;
case CL_PLATFORM_EXTENSIONS:
pinfo_checks->has_khr_icd = !!strstr(strbuf, "cl_khr_icd");
pinfo_checks->has_amd_object_metadata = !!strstr(strbuf, "cl_amd_object_metadata");
pdata[p].has_amd_offline = !!strstr(strbuf, "cl_amd_offline_devices");
break;
case CL_PLATFORM_ICD_SUFFIX_KHR:
/* Store ICD suffix for future reference */
len = strlen(strbuf);
ALLOC(pdata[p].sname, len+1, "platform ICD suffix copy");
/* memcpy instead of strncpy since we already have the len
* and memcpy is possibly more optimized */
memcpy(pdata[p].sname, strbuf, len);
pdata[p].sname[len] = '\0';
default:
/* do nothing */
break;
}
}
if (pinfo_checks->plat_version > max_plat_version)
max_plat_version = pinfo_checks->plat_version;
/* if no CL_PLATFORM_ICD_SUFFIX_KHR, use P### as short/symbolic name */
if (!pdata[p].sname) {
#define SNAME_MAX 32
ALLOC(pdata[p].sname, SNAME_MAX, "platform symbolic name");
snprintf(pdata[p].sname, SNAME_MAX, "P%u", p);
}
len = strlen(pdata[p].sname);
if (len > platform_sname_maxlen)
platform_sname_maxlen = len;
error = clGetDeviceIDs(pid, CL_DEVICE_TYPE_ALL, 0, NULL, &(pdata[p].ndevs));
if (error == CL_DEVICE_NOT_FOUND)
pdata[p].ndevs = 0;
else
CHECK_ERROR("number of devices");
num_devs_all += pdata[p].ndevs;
if (pdata[p].ndevs > maxdevs)
maxdevs = pdata[p].ndevs;
}
int
getWGsizes(cl_platform_id pid, cl_device_id dev)
{
int ret = 0;
#define RR_ERROR(what) do { \
had_error = REPORT_ERROR(what); \
if (had_error) { \
ret = error; \
goto out; \
} \
} while(0)
cl_context_properties ctxpft[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties)pid,
0, 0 };
cl_uint cursor = 0;
cl_context ctx = NULL;
cl_program prg = NULL;
cl_kernel krn = NULL;
ctx = clCreateContext(ctxpft, 1, &dev, NULL, NULL, &error);
RR_ERROR("create context");
prg = clCreateProgramWithSource(ctx, ARRAY_SIZE(sources), sources, NULL, &error);
RR_ERROR("create program");
error = clBuildProgram(prg, 1, &dev, NULL, NULL, NULL);
had_error = REPORT_ERROR("build program");
if (had_error)
ret = error;
/* for a program build failure, dump the log to stderr before bailing */
if (error == CL_BUILD_PROGRAM_FAILURE) {
/* Do not clobber strbuf, shadow it */
char *strbuf = NULL;
size_t bufsz = 0, nusz = 0;
GET_STRING(clGetProgramBuildInfo, CL_PROGRAM_BUILD_LOG, "CL_PROGRAM_BUILD_LOG", prg, dev);
if (error == CL_SUCCESS) {
fflush(stdout);
fflush(stderr);
fputs("=== CL_PROGRAM_BUILD_LOG ===\n", stderr);
fputs(strbuf, stderr);
fflush(stderr);
}
free(strbuf);
}
if (had_error)
goto out;
for (cursor = 0; cursor < NUM_KERNELS; ++cursor) {
snprintf(strbuf, bufsz, "sum%u", 1<has_##ext[0]); \
}
DEFINE_EXT_CHECK(half)
DEFINE_EXT_CHECK(double)
DEFINE_EXT_CHECK(nv)
DEFINE_EXT_CHECK(amd)
DEFINE_EXT_CHECK(amd_svm)
DEFINE_EXT_CHECK(arm_svm)
DEFINE_EXT_CHECK(fission)
DEFINE_EXT_CHECK(atomic_counters)
DEFINE_EXT_CHECK(image2d_buffer)
DEFINE_EXT_CHECK(il_program)
DEFINE_EXT_CHECK(intel_local_thread)
DEFINE_EXT_CHECK(intel_AME)
DEFINE_EXT_CHECK(intel_AVC_ME)
DEFINE_EXT_CHECK(intel_planar_yuv)
DEFINE_EXT_CHECK(intel_required_subgroup_size)
DEFINE_EXT_CHECK(altera_dev_temp)
DEFINE_EXT_CHECK(p2p)
DEFINE_EXT_CHECK(spir)
DEFINE_EXT_CHECK(qcom_ext_host_ptr)
DEFINE_EXT_CHECK(simultaneous_sharing)
DEFINE_EXT_CHECK(subgroup_named_barrier)
DEFINE_EXT_CHECK(terminate_context)
/* In the version checks we negate the opposite conditions
* instead of double-negating the actual condition
*/
// device supports 1.2
int dev_is_12(const struct device_info_checks *chk)
{
return !(chk->dev_version < 12);
}
// device supports 2.0
int dev_is_20(const struct device_info_checks *chk)
{
return !(chk->dev_version < 20);
}
// device supports 2.1
int dev_is_21(const struct device_info_checks *chk)
{
return !(chk->dev_version < 21);
}
// device does not support 2.0
int dev_not_20(const struct device_info_checks *chk)
{
return !(chk->dev_version >= 20);
}
int dev_is_gpu(const struct device_info_checks *chk)
{
return !!(chk->devtype & CL_DEVICE_TYPE_GPU);
}
int dev_is_gpu_amd(const struct device_info_checks *chk)
{
return dev_is_gpu(chk) && dev_has_amd(chk);
}
/* Device supports cl_amd_device_attribute_query v4 */
int dev_has_amd_v4(const struct device_info_checks *chk)
{
/* We don't actually have a criterion ot check if the device
* supports a specific version of an extension, so for the time
* being rely on them being GPU devices with cl_amd_device_attribute_query
* and the platform supporting OpenCL 2.0 or later
* TODO FIXME tune criteria
*/
return dev_is_gpu(chk) && dev_has_amd(chk) && plat_is_20(chk->pinfo_checks);
}
int dev_has_svm(const struct device_info_checks *chk)
{
return dev_is_20(chk) || dev_has_amd_svm(chk);
}
int dev_has_partition(const struct device_info_checks *chk)
{
return dev_is_12(chk) || dev_has_fission(chk);
}
int dev_has_cache(const struct device_info_checks *chk)
{
return chk->cachetype != CL_NONE;
}
int dev_has_lmem(const struct device_info_checks *chk)
{
return chk->lmemtype != CL_NONE;
}
int dev_has_il(const struct device_info_checks *chk)
{
return dev_is_21(chk) || dev_has_il_program(chk);
}
int dev_has_images(const struct device_info_checks *chk)
{
return chk->image_support;
}
int dev_has_images_12(const struct device_info_checks *chk)
{
return dev_has_images(chk) && dev_is_12(chk);
}
int dev_has_images_20(const struct device_info_checks *chk)
{
return dev_has_images(chk) && dev_is_20(chk);
}
int dev_has_compiler(const struct device_info_checks *chk)
{
return chk->compiler_available;
}
void identify_device_extensions(const char *extensions, struct device_info_checks *chk)
{
#define _HAS_EXT(ext) (strstr(extensions, ext))
#define HAS_EXT(ext) _HAS_EXT(#ext)
#define CPY_EXT(what, ext) do { \
strncpy(chk->has_##what, has, sizeof(ext)); \
chk->has_##what[sizeof(ext)-1] = '\0'; \
} while (0)
#define CHECK_EXT(what, ext) do { \
has = _HAS_EXT(#ext); \
if (has) CPY_EXT(what, #ext); \
} while(0)
char *has;
CHECK_EXT(half, cl_khr_fp16);
CHECK_EXT(spir, cl_khr_spir);
CHECK_EXT(double, cl_khr_fp64);
if (!dev_has_double(chk))
CHECK_EXT(double, cl_amd_fp64);
if (!dev_has_double(chk))
CHECK_EXT(double, cl_APPLE_fp64_basic_ops);
CHECK_EXT(nv, cl_nv_device_attribute_query);
CHECK_EXT(amd, cl_amd_device_attribute_query);
CHECK_EXT(amd_svm, cl_amd_svm);
CHECK_EXT(arm_svm, cl_arm_shared_virtual_memory);
CHECK_EXT(fission, cl_ext_device_fission);
CHECK_EXT(atomic_counters, cl_ext_atomic_counters_64);
if (dev_has_atomic_counters(chk))
CHECK_EXT(atomic_counters, cl_ext_atomic_counters_32);
CHECK_EXT(image2d_buffer, cl_khr_image2d_from_buffer);
CHECK_EXT(il_program, cl_khr_il_program);
CHECK_EXT(intel_local_thread, cl_intel_exec_by_local_thread);
CHECK_EXT(intel_AME, cl_intel_advanced_motion_estimation);
CHECK_EXT(intel_AVC_ME, cl_intel_device_side_avc_motion_estimation);
CHECK_EXT(intel_planar_yuv, cl_intel_planar_yuv);
CHECK_EXT(intel_required_subgroup_size, cl_intel_required_subgroup_size);
CHECK_EXT(altera_dev_temp, cl_altera_device_temperature);
CHECK_EXT(p2p, cl_amd_copy_buffer_p2p);
CHECK_EXT(qcom_ext_host_ptr, cl_qcom_ext_host_ptr);
CHECK_EXT(simultaneous_sharing, cl_intel_simultaneous_sharing);
CHECK_EXT(subgroup_named_barrier, cl_khr_subgroup_named_barrier);
CHECK_EXT(terminate_context, cl_khr_terminate_context);
}
/*
* Device info print functions
*/
#define _GET_VAL \
error = clGetDeviceInfo(dev, param, sizeof(val), &val, NULL); \
had_error = REPORT_ERROR2("get %s"); \
CHECK_SIZE(clGetDeviceInfo, dev, param);
#define _GET_VAL_ARRAY \
error = clGetDeviceInfo(dev, param, 0, NULL, &szval); \
had_error = REPORT_ERROR2("get number of %s"); \
numval = szval/sizeof(*val); \
if (!had_error) { \
REALLOC(val, numval, current_param); \
error = clGetDeviceInfo(dev, param, szval, val, NULL); \
had_error = REPORT_ERROR("get %s"); \
if (had_error) { free(val); val = NULL; } \
}
#define GET_VAL do { \
_GET_VAL \
CHECK_SKIP(checked); \
} while (0)
#define GET_VAL_ARRAY do { \
_GET_VAL_ARRAY \
CHECK_SKIP(checked); \
} while (0)
#define _FMT_VAL(fmt) \
if (had_error) \
show_strbuf(pname, 0); \
else \
printf("%s" I1_STR fmt "%s\n", line_pfx, pname, val, cur_sfx);
#define FMT_VAL(fmt) do { \
_FMT_VAL(fmt) \
} while (0)
#define SHOW_VAL(fmt) do { \
_GET_VAL \
CHECK_SKIP(checked); \
_FMT_VAL(fmt) \
} while (0)
#define DEFINE_DEVINFO_SHOW(how, type, fmt) \
int device_info_##how(cl_device_id dev, cl_device_info param, const char *pname, \
const struct device_info_checks* UNUSED(chk), int checked) \
{ \
type val = 0; \
SHOW_VAL(fmt); \
return had_error; \
}
/* Get string-type info without showing it */
int device_info_str_get(cl_device_id dev, cl_device_info param, const char *sname,
const struct device_info_checks* UNUSED(chk), int UNUSED(checked))
{
current_param = sname;
GET_STRING2(clGetDeviceInfo, dev, param);
return had_error;
}
int device_info_str(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks *chk, int checked)
{
had_error = device_info_str_get(dev, param, current_param, chk, checked);
CHECK_SKIP(checked);
show_strbuf(pname, 1);
return had_error;
}
DEFINE_DEVINFO_SHOW(int, cl_uint, "%u")
DEFINE_DEVINFO_SHOW(hex, cl_uint, "0x%x")
DEFINE_DEVINFO_SHOW(long, cl_ulong, "%" PRIu64)
DEFINE_DEVINFO_SHOW(sz, size_t, "%" PRIuS)
int device_info_bool(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_bool val = 0;
const char * const * str = (output_mode == CLINFO_HUMAN ?
bool_str : bool_raw_str);
GET_VAL;
if (had_error)
show_strbuf(pname, 0);
else {
printf("%s" I1_STR "%s%s\n", line_pfx, pname, str[val], cur_sfx);
/* abuse strbuf to pass the bool value up to the caller,
* this is used e.g. by CL_DEVICE_IMAGE_SUPPORT
*/
memcpy(strbuf, &val, sizeof(val));
}
return had_error;
}
int device_info_bits(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_uint val;
GET_VAL;
if (!had_error)
sprintf(strbuf, "%u bits (%u bytes)", val, val/8);
show_strbuf(pname, 0);
return had_error;
}
size_t strbuf_mem(cl_ulong val, size_t szval)
{
double dbl = (double)val;
size_t sfx = 0;
while (dbl > 1024 && sfx < memsfx_end) {
dbl /= 1024;
++sfx;
}
return sprintf(strbuf + szval, " (%.4lg%s)",
dbl, memsfx[sfx]);
}
int device_info_mem(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_ulong val = 0;
size_t szval = 0;
GET_VAL;
if (!had_error) {
szval += sprintf(strbuf, "%" PRIu64, val);
if (output_mode == CLINFO_HUMAN && val > 1024)
strbuf_mem(val, szval);
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_mem_int(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_uint val = 0;
size_t szval = 0;
GET_VAL;
if (!had_error) {
szval += sprintf(strbuf, "%u", val);
if (output_mode == CLINFO_HUMAN && val > 1024)
strbuf_mem(val, szval);
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_mem_sz(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t val = 0;
size_t szval = 0;
GET_VAL;
if (!had_error) {
szval += sprintf(strbuf, "%" PRIuS, val);
if (output_mode == CLINFO_HUMAN && val > 1024)
strbuf_mem(val, szval);
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_free_mem_amd(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t *val = NULL;
size_t szval = 0, numval = 0;
GET_VAL_ARRAY;
if (!had_error) {
size_t cursor = 0;
szval = 0;
for (cursor = 0; cursor < numval; ++cursor) {
if (szval > 0) {
strbuf[szval] = ' ';
++szval;
}
szval += sprintf(strbuf + szval, "%" PRIuS, val[cursor]);
if (output_mode == CLINFO_HUMAN)
szval += strbuf_mem(val[cursor]*UINT64_C(1024), szval);
}
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
int device_info_time_offset(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_ulong val = 0;
GET_VAL;
if (!had_error) {
size_t szval = 0;
time_t time = val/UINT64_C(1000000000);
szval += snprintf(strbuf, bufsz, "%" PRIu64 "ns (", val);
szval += bufcpy(szval, ctime(&time));
/* overwrite ctime's newline with the closing parenthesis */
if (szval < bufsz)
strbuf[szval - 1] = ')';
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_szptr_sep(const char *human_sep, cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t *val = NULL;
size_t szval = 0, numval = 0;
GET_VAL_ARRAY;
if (!had_error) {
size_t counter = 0;
set_separator(output_mode == CLINFO_HUMAN ? human_sep : spc_str);
szval = 0;
for (counter = 0; counter < numval; ++counter) {
add_separator(&szval);
szval += snprintf(strbuf + szval, bufsz - szval - 1, "%" PRIuS, val[counter]);
if (szval >= bufsz) {
trunc_strbuf();
break;
}
}
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
int device_info_szptr_times(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* chk, int checked)
{
return device_info_szptr_sep(times_str, dev, param, pname, chk, checked);
}
int device_info_szptr_comma(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* chk, int checked)
{
return device_info_szptr_sep(comma_str, dev, param, pname, chk, checked);
}
int device_info_wg(cl_device_id dev, cl_device_info UNUSED(param), const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_platform_id val = NULL;
{
/* shadow */
cl_device_info param = CL_DEVICE_PLATFORM;
current_param = "CL_DEVICE_PLATFORM";
GET_VAL;
}
current_param = pname;
if (!had_error)
had_error = getWGsizes(val, dev);
if (!had_error) {
sprintf(strbuf, "%" PRIuS, wgm[0]);
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_img_sz_2d(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t width = 0, height = 0, val = 0;
GET_VAL; /* HEIGHT */
if (!had_error) {
height = val;
param = CL_DEVICE_IMAGE2D_MAX_WIDTH;
current_param = "CL_DEVICE_IMAGE2D_MAX_WIDTH";
GET_VAL;
if (!had_error) {
width = val;
sprintf(strbuf, "%" PRIuS "x%" PRIuS, width, height);
}
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_img_sz_intel_planar_yuv(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t width = 0, height = 0, val = 0;
GET_VAL; /* HEIGHT */
if (!had_error) {
height = val;
param = CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL;
current_param = "CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL";
GET_VAL;
if (!had_error) {
width = val;
sprintf(strbuf, "%" PRIuS "x%" PRIuS, width, height);
}
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_img_sz_3d(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t width = 0, height = 0, depth = 0, val = 0;
GET_VAL; /* HEIGHT */
if (!had_error) {
height = val;
param = CL_DEVICE_IMAGE3D_MAX_WIDTH;
current_param = "CL_DEVICE_IMAGE3D_MAX_WIDTH";
GET_VAL;
if (!had_error) {
width = val;
param = CL_DEVICE_IMAGE3D_MAX_DEPTH;
current_param = "CL_DEVICE_IMAGE3D_MAX_DEPTH";
GET_VAL;
if (!had_error) {
depth = val;
sprintf(strbuf, "%" PRIuS "x%" PRIuS "x%" PRIuS,
width, height, depth);
}
}
}
show_strbuf(pname, 0);
return had_error;
}
int device_info_devtype(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_type val = 0;
GET_VAL;
if (!had_error) {
/* iterate over device type strings, appending their textual form
* to strbuf */
cl_uint i = (cl_uint)actual_devtype_count;
const char * const *devstr = (output_mode == CLINFO_HUMAN ?
device_type_str : device_type_raw_str);
size_t szval = 0;
strbuf[szval] = '\0';
set_separator(output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
for (; i > 0; --i) {
/* assemble CL_DEVICE_TYPE_* from index i */
cl_device_type cur = (cl_device_type)(1) << (i-1);
if (val & cur) {
/* match: add separator if not first match */
add_separator(&szval);
szval += bufcpy(szval, devstr[i]);
}
}
/* check for extra bits */
if (szval < bufsz) {
cl_device_type known_mask = ((cl_device_type)(1) << actual_devtype_count) - 1;
cl_device_type extra = val & ~known_mask;
if (extra) {
add_separator(&szval);
szval += snprintf(strbuf + szval, bufsz - szval - 1, "0x%" PRIX64, extra);
}
}
}
show_strbuf(pname, 0);
/* we abuse global strbuf to pass the device type over to the caller */
if (!had_error)
memcpy(strbuf, &val, sizeof(val));
return had_error;
}
int device_info_cachetype(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_mem_cache_type val = 0;
GET_VAL;
if (!had_error) {
const char * const *ar = (output_mode == CLINFO_HUMAN ?
cache_type_str : cache_type_raw_str);
bufcpy(0, ar[val]);
}
show_strbuf(pname, 0);
/* we abuse global strbuf to pass the cache type over to the caller */
if (!had_error)
memcpy(strbuf, &val, sizeof(val));
return had_error;
}
int device_info_lmemtype(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_local_mem_type val = 0;
GET_VAL;
if (!had_error) {
const char * const *ar = (output_mode == CLINFO_HUMAN ?
lmem_type_str : lmem_type_raw_str);
bufcpy(0, ar[val]);
}
show_strbuf(pname, 0);
/* we abuse global strbuf to pass the lmem type over to the caller */
if (!had_error)
memcpy(strbuf, &val, sizeof(val));
return had_error;
}
/* stringify a cl_device_topology_amd */
void devtopo_str(const cl_device_topology_amd *devtopo)
{
switch (devtopo->raw.type) {
case 0:
if (output_mode == CLINFO_HUMAN)
sprintf(strbuf, "(%s)", na);
else
sprintf(strbuf, none_raw);
break;
case CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD:
sprintf(strbuf, "PCI-E, %02x:%02x.%u",
(cl_uchar)(devtopo->pcie.bus),
devtopo->pcie.device, devtopo->pcie.function);
break;
default:
sprintf(strbuf, "",
devtopo->raw.type,
devtopo->raw.data[0], devtopo->raw.data[1],
devtopo->raw.data[2],
devtopo->raw.data[3], devtopo->raw.data[4]);
}
}
int device_info_devtopo_amd(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_topology_amd val;
GET_VAL;
/* TODO how to do this in CLINFO_RAW mode */
if (!had_error) {
devtopo_str(&val);
}
show_strbuf(pname, 0);
return had_error;
}
/* we assemble a cl_device_topology_amd struct from the NVIDIA info */
int device_info_devtopo_nv(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_topology_amd devtopo;
cl_uint val = 0;
devtopo.raw.type = CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD;
GET_VAL; /* CL_DEVICE_PCI_BUS_ID_NV */
if (!had_error) {
devtopo.pcie.bus = val & 0xff;
param = CL_DEVICE_PCI_SLOT_ID_NV;
current_param = "CL_DEVICE_PCI_SLOT_ID_NV";
GET_VAL;
if (!had_error) {
devtopo.pcie.device = (val >> 3) & 0xff;
devtopo.pcie.function = val & 7;
devtopo_str(&devtopo);
}
}
show_strbuf(pname, 0);
return had_error;
}
/* NVIDIA Compute Capability */
int device_info_cc_nv(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_uint major = 0, val = 0;
GET_VAL; /* MAJOR */
if (!had_error) {
major = val;
param = CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV;
current_param = "CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV";
GET_VAL;
if (!had_error)
snprintf(strbuf, bufsz, "%u.%u", major, val);
}
show_strbuf(pname, 0);
return had_error;
}
/* AMD GFXIP */
int device_info_gfxip_amd(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_uint major = 0, val = 0;
GET_VAL; /* MAJOR */
if (!had_error) {
major = val;
param = CL_DEVICE_GFXIP_MINOR_AMD;
current_param = "CL_DEVICE_GFXIP_MINOR_AMD";
GET_VAL;
if (!had_error)
snprintf(strbuf, bufsz, "%u.%u", major, val);
}
show_strbuf(pname, 0);
return had_error;
}
/* Device Partition, CLINFO_HUMAN header */
int device_info_partition_header(cl_device_id UNUSED(dev), cl_device_info UNUSED(param),
const char *pname, const struct device_info_checks *chk, int UNUSED(checked))
{
int is_12 = dev_is_12(chk);
int has_fission = dev_has_fission(chk);
size_t szval = snprintf(strbuf, bufsz, "(%s%s%s)",
(is_12 ? core : empty_str),
(is_12 && has_fission ? comma_str : empty_str),
chk->has_fission);
if (szval >= bufsz)
trunc_strbuf();
show_strbuf(pname, 0);
had_error = CL_SUCCESS;
return had_error;
}
/* Device partition properties */
int device_info_partition_types(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t numval = 0, szval = 0, cursor = 0, slen = 0;
cl_device_partition_property *val = NULL;
const char * const *ptstr = (output_mode == CLINFO_HUMAN ?
partition_type_str : partition_type_raw_str);
set_separator(output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
GET_VAL_ARRAY;
szval = 0;
if (!had_error) {
for (cursor = 0; cursor < numval; ++cursor) {
int str_idx = -1;
/* add separator for values past the first */
add_separator(&szval);
switch (val[cursor]) {
case 0: str_idx = 1; break;
case CL_DEVICE_PARTITION_EQUALLY: str_idx = 2; break;
case CL_DEVICE_PARTITION_BY_COUNTS: str_idx = 3; break;
case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: str_idx = 4; break;
case CL_DEVICE_PARTITION_BY_NAMES_INTEL: str_idx = 5; break;
default:
szval += snprintf(strbuf + szval, bufsz - szval - 1, "by (0x%" PRIXPTR ")", val[cursor]);
break;
}
if (str_idx > 0) {
/* string length, minus _EXT */
slen = strlen(ptstr[str_idx]);
if (output_mode == CLINFO_RAW && str_idx > 1)
slen -= 4;
szval += bufcpy_len(szval, ptstr[str_idx], slen);
}
if (szval >= bufsz) {
trunc_strbuf();
break;
}
}
if (szval == 0) {
bufcpy(szval, ptstr[0]);
} else if (szval < bufsz)
strbuf[szval] = '\0';
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
int device_info_partition_types_ext(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t numval = 0, szval = 0, cursor = 0, slen = 0;
cl_device_partition_property_ext *val = NULL;
const char * const *ptstr = (output_mode == CLINFO_HUMAN ?
partition_type_str : partition_type_raw_str);
set_separator(output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
GET_VAL_ARRAY;
szval = 0;
if (!had_error) {
for (cursor = 0; cursor < numval; ++cursor) {
int str_idx = -1;
/* add separator for values past the first */
add_separator(&szval);
switch (val[cursor]) {
case 0: str_idx = 1; break;
case CL_DEVICE_PARTITION_EQUALLY_EXT: str_idx = 2; break;
case CL_DEVICE_PARTITION_BY_COUNTS_EXT: str_idx = 3; break;
case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: str_idx = 4; break;
case CL_DEVICE_PARTITION_BY_NAMES_EXT: str_idx = 5; break;
default:
szval += snprintf(strbuf + szval, bufsz - szval - 1, "by (0x%" PRIX64 ")", val[cursor]);
break;
}
if (str_idx > 0) {
/* string length */
slen = strlen(ptstr[str_idx]);
strncpy(strbuf + szval, ptstr[str_idx], slen);
szval += slen;
}
if (szval >= bufsz) {
trunc_strbuf();
break;
}
}
if (szval == 0) {
slen = strlen(ptstr[0]);
memcpy(strbuf, ptstr[0], slen);
szval += slen;
}
if (szval < bufsz)
strbuf[szval] = '\0';
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
/* Device partition affinity domains */
int device_info_partition_affinities(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_affinity_domain val;
GET_VAL;
if (!had_error && val) {
/* iterate over affinity domain strings appending their textual form
* to strbuf */
size_t szval = 0;
cl_uint i = 0;
const char * const *affstr = (output_mode == CLINFO_HUMAN ?
affinity_domain_str : affinity_domain_raw_str);
set_separator(output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
for (i = 0; i < affinity_domain_count; ++i) {
cl_device_affinity_domain cur = (cl_device_affinity_domain)(1) << i;
if (val & cur) {
/* match: add separator if not first match */
add_separator(&szval);
szval += bufcpy(szval, affstr[i]);
}
if (szval >= bufsz)
break;
}
/* check for extra bits */
if (szval < bufsz) {
cl_device_affinity_domain known_mask = ((cl_device_affinity_domain)(1) << affinity_domain_count) - 1;
cl_device_affinity_domain extra = val & ~known_mask;
if (extra) {
add_separator(&szval);
szval += snprintf(strbuf + szval, bufsz - szval - 1, "0x%" PRIX64, extra);
}
}
}
if (val || had_error)
show_strbuf(pname, 0);
return had_error;
}
int device_info_partition_affinities_ext(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
size_t numval = 0, szval = 0, cursor = 0, slen = 0;
cl_device_partition_property_ext *val = NULL;
const char * const *ptstr = (output_mode == CLINFO_HUMAN ?
affinity_domain_ext_str : affinity_domain_raw_ext_str);
set_separator(output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
GET_VAL_ARRAY;
szval = 0;
if (!had_error) {
for (cursor = 0; cursor < numval; ++cursor) {
int str_idx = -1;
/* add separator for values past the first */
add_separator(&szval);
switch (val[cursor]) {
case CL_AFFINITY_DOMAIN_NUMA_EXT: str_idx = 0; break;
case CL_AFFINITY_DOMAIN_L4_CACHE_EXT: str_idx = 1; break;
case CL_AFFINITY_DOMAIN_L3_CACHE_EXT: str_idx = 2; break;
case CL_AFFINITY_DOMAIN_L2_CACHE_EXT: str_idx = 3; break;
case CL_AFFINITY_DOMAIN_L1_CACHE_EXT: str_idx = 4; break;
case CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT: str_idx = 5; break;
default:
szval += snprintf(strbuf + szval, bufsz - szval - 1, " (0x%" PRIX64 ")", val[cursor]);
break;
}
if (str_idx >= 0) {
/* string length */
const char *str = ptstr[str_idx];
slen = strlen(str);
strncpy(strbuf + szval, str, slen);
szval += slen;
}
if (szval >= bufsz) {
trunc_strbuf();
break;
}
}
strbuf[szval] = '\0';
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
/* Preferred / native vector widths */
int device_info_vecwidth(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks *chk, int checked)
{
cl_uint preferred = 0, val = 0;
GET_VAL;
if (!had_error) {
preferred = val;
/* we get called with PREFERRED, NATIVE is at +0x30 offset, except for HALF,
* which is at +0x08 */
param += (param == CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF ? 0x08 : 0x30);
/* TODO update current_param */
GET_VAL;
if (!had_error) {
size_t szval = 0;
const char *ext = (param == CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF ?
chk->has_half : (param == CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE ?
chk->has_double : NULL));
szval = sprintf(strbuf, "%8u / %-8u", preferred, val);
if (ext)
sprintf(strbuf + szval, " (%s)", *ext ? ext : na);
}
}
show_strbuf(pname, 0);
return had_error;
}
/* Floating-point configurations */
int device_info_fpconf(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks *chk, int checked)
{
cl_device_fp_config val = 0;
int get_it = (
(param == CL_DEVICE_SINGLE_FP_CONFIG) ||
(param == CL_DEVICE_HALF_FP_CONFIG && dev_has_half(chk)) ||
(param == CL_DEVICE_DOUBLE_FP_CONFIG && dev_has_double(chk)));
if (get_it)
GET_VAL;
else
had_error = CL_SUCCESS;
if (!had_error) {
size_t szval = 0;
cl_uint i = 0;
const char * const *fpstr = (output_mode == CLINFO_HUMAN ?
fp_conf_str : fp_conf_raw_str);
set_separator(vbar_str);
if (output_mode == CLINFO_HUMAN) {
const char *why = na;
switch (param) {
case CL_DEVICE_HALF_FP_CONFIG:
if (get_it)
why = chk->has_half;
break;
case CL_DEVICE_SINGLE_FP_CONFIG:
why = core;
break;
case CL_DEVICE_DOUBLE_FP_CONFIG:
if (get_it)
why = chk->has_double;
break;
default:
/* "this can't happen" (unless OpenCL starts supporting _other_ floating-point formats, maybe) */
fprintf(stderr, "unsupported floating-point configuration parameter %s\n", pname);
}
/* show 'why' it's being shown */
szval += sprintf(strbuf, "(%s)", why);
}
if (get_it) {
/* The last flag, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is only considered
* in the single-precision case. half and double don't consider it,
* so we skip it altogether */
size_t num_flags = fp_conf_count;
if (param != CL_DEVICE_SINGLE_FP_CONFIG)
num_flags -= 1;
for (i = 0; i < num_flags; ++i) {
cl_device_fp_config cur = (cl_device_fp_config)(1) << i;
if (output_mode == CLINFO_HUMAN) {
szval += sprintf(strbuf + szval, "\n%s" I2_STR "%s",
line_pfx, fpstr[i], bool_str[!!(val & cur)]);
} else if (val & cur) {
add_separator(&szval);
szval += bufcpy(szval, fpstr[i]);
}
}
}
}
/* only print this for HUMAN output or if we actually got the value */
if (output_mode == CLINFO_HUMAN || get_it)
show_strbuf(pname, 0);
return had_error;
}
/* Queue properties */
int device_info_qprop(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks *chk, int checked)
{
cl_command_queue_properties val = 0;
GET_VAL;
if (!had_error) {
size_t szval = 0;
cl_uint i = 0;
const char * const *qpstr = (output_mode == CLINFO_HUMAN ?
queue_prop_str : queue_prop_raw_str);
set_separator(vbar_str);
for (i = 0; i < queue_prop_count; ++i) {
cl_command_queue_properties cur = (cl_command_queue_properties)(1) << i;
if (output_mode == CLINFO_HUMAN) {
szval += sprintf(strbuf + szval, "\n%s" I2_STR "%s",
line_pfx, qpstr[i], bool_str[!!(val & cur)]);
} else if (val & cur) {
add_separator(&szval);
szval += bufcpy(szval, qpstr[i]);
}
}
if (output_mode == CLINFO_HUMAN && param == CL_DEVICE_QUEUE_PROPERTIES &&
dev_has_intel_local_thread(chk))
sprintf(strbuf + szval, "\n%s" I2_STR "%s",
line_pfx, "Local thread execution (Intel)", bool_str[CL_TRUE]);
}
show_strbuf(pname, 0);
return had_error;
}
/* Execution capbilities */
int device_info_execap(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_exec_capabilities val = 0;
GET_VAL;
if (!had_error) {
size_t szval = 0;
cl_uint i = 0;
const char * const *qpstr = (output_mode == CLINFO_HUMAN ?
execap_str : execap_raw_str);
set_separator(vbar_str);
for (i = 0; i < execap_count; ++i) {
cl_device_exec_capabilities cur = (cl_device_exec_capabilities)(1) << i;
if (output_mode == CLINFO_HUMAN) {
szval += sprintf(strbuf + szval, "\n%s" I2_STR "%s",
line_pfx, qpstr[i], bool_str[!!(val & cur)]);
} else if (val & cur) {
add_separator(&szval);
szval += bufcpy(szval, qpstr[i]);
}
}
}
show_strbuf(pname, 0);
return had_error;
}
/* Arch bits and endianness (HUMAN) */
int device_info_arch(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_uint bits = 0;
{
cl_uint val = 0;
GET_VAL;
if (!had_error)
bits = val;
}
if (!had_error) {
cl_bool val = 0;
param = CL_DEVICE_ENDIAN_LITTLE;
current_param = "CL_DEVICE_ENDIAN_LITTLE";
GET_VAL;
if (!had_error)
sprintf(strbuf, "%u, %s", bits, endian_str[val]);
}
show_strbuf(pname, 0);
return had_error;
}
/* SVM capabilities */
int device_info_svm_cap(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks *chk, int checked)
{
cl_device_svm_capabilities val = 0;
const int is_20 = dev_is_20(chk);
const int has_amd_svm = (param == CL_DEVICE_SVM_CAPABILITIES && dev_has_amd_svm(chk));
GET_VAL;
if (!had_error) {
size_t szval = 0;
cl_uint i = 0;
const char * const *scstr = (output_mode == CLINFO_HUMAN ?
svm_cap_str : svm_cap_raw_str);
set_separator(vbar_str);
if (output_mode == CLINFO_HUMAN && param == CL_DEVICE_SVM_CAPABILITIES) {
/* show 'why' it's being shown */
szval += sprintf(strbuf, "(%s%s%s)",
(is_20 ? core : empty_str),
(is_20 && has_amd_svm ? comma_str : empty_str),
chk->has_amd_svm);
}
for (i = 0; i < svm_cap_count; ++i) {
cl_device_svm_capabilities cur = (cl_device_svm_capabilities)(1) << i;
if (output_mode == CLINFO_HUMAN) {
szval += sprintf(strbuf + szval, "\n%s" I2_STR "%s",
line_pfx, scstr[i], bool_str[!!(val & cur)]);
} else if (val & cur) {
add_separator(&szval);
szval += bufcpy(szval, scstr[i]);
}
}
}
show_strbuf(pname, 0);
return had_error;
}
/* Device terminate capability */
int device_info_terminate_capability(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_terminate_capability_khr val;
GET_VAL;
if (!had_error && val) {
/* iterate over terminate capability strings appending their textual form
* to strbuf */
size_t szval = 0;
cl_uint i = 0;
const char * const *capstr = (output_mode == CLINFO_HUMAN ?
terminate_capability_str : terminate_capability_raw_str);
set_separator(output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
for (i = 0; i < terminate_capability_count; ++i) {
cl_device_terminate_capability_khr cur = (cl_device_terminate_capability_khr)(1) << i;
if (val & cur) {
/* match: add separator if not first match */
add_separator(&szval);
szval += bufcpy(szval, capstr[i]);
}
if (szval >= bufsz)
break;
}
/* check for extra bits */
if (szval < bufsz) {
cl_device_terminate_capability_khr known_mask = ((cl_device_terminate_capability_khr)(1) << terminate_capability_count) - 1;
cl_device_terminate_capability_khr extra = val & ~known_mask;
if (extra) {
add_separator(&szval);
szval += snprintf(strbuf + szval, bufsz - szval - 1, "0x%" PRIX64, extra);
}
}
}
if (val || had_error)
show_strbuf(pname, 0);
return had_error;
}
int device_info_p2p_dev_list(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_device_id *val = NULL;
size_t szval = 0, numval = 0;
GET_VAL_ARRAY;
if (!had_error) {
size_t cursor = 0;
szval = 0;
for (cursor= 0; cursor < numval; ++cursor) {
if (szval > 0) {
strbuf[szval] = ' ';
++szval;
}
szval += snprintf(strbuf + szval, bufsz - szval - 1, "0x%p", (void*)val[cursor]);
}
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
int device_info_interop_list(cl_device_id dev, cl_device_info param, const char *pname,
const struct device_info_checks* UNUSED(chk), int checked)
{
cl_uint *val = NULL;
size_t szval = 0, numval = 0;
GET_VAL_ARRAY;
if (!had_error) {
size_t cursor = 0;
const cl_interop_name *interop_name_end = cl_interop_names + num_known_interops;
cl_uint human_raw = output_mode - CLINFO_HUMAN;
const char *groupsep = (output_mode == CLINFO_HUMAN ? comma_str : vbar_str);
cl_bool first = CL_TRUE;
szval = 0;
for (cursor = 0; cursor < numval; ++cursor) {
cl_uint current = val[cursor];
if (!current && cursor < numval - 1) {
/* A null value is used as group terminator, but we only print it
* if it's not the final one
*/
szval += snprintf(strbuf + szval, bufsz - szval - 1, "%s", groupsep);
first = CL_TRUE;
}
if (current) {
cl_bool found = CL_FALSE;
const cl_interop_name *n = cl_interop_names;
if (!first) {
strbuf[szval] = ' ';
++szval;
}
while (n < interop_name_end) {
if (current >= n->from && current <= n->to) {
found = CL_TRUE;
break;
}
++n;
}
if (found) {
cl_uint i = current - n->from;
szval += snprintf(strbuf + szval, bufsz - szval - 1, "%s", n->value[i][human_raw]);
} else {
szval += snprintf(strbuf + szval, bufsz - szval - 1, "0x%" PRIX32, val[cursor]);
}
first = CL_FALSE;
}
if (szval >= bufsz) {
trunc_strbuf();
break;
}
}
}
show_strbuf(pname, 0);
free(val);
return had_error;
}
/*
* Device info traits
*/
/* A CL_FALSE param means "just print pname" */
struct device_info_traits {
enum output_modes output_mode;
cl_device_info param; // CL_DEVICE_*
const char *sname; // "CL_DEVICE_*"
const char *pname; // "Device *"
const char *sfx; // suffix for the output in non-raw mode
/* pointer to function that shows the parameter */
int (*show_func)(cl_device_id dev, cl_device_info param, const char *pname, const struct device_info_checks *, int checked);
/* pointer to function that checks if the parameter should be checked */
int (*check_func)(const struct device_info_checks *);
};
#define DINFO_SFX(symbol, name, sfx, typ) symbol, #symbol, name, sfx, device_info_##typ
#define DINFO(symbol, name, typ) symbol, #symbol, name, NULL, device_info_##typ
struct device_info_traits dinfo_traits[] = {
{ CLINFO_BOTH, DINFO(CL_DEVICE_NAME, "Device Name", str), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR, "Device Vendor", str), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_VENDOR_ID, "Device Vendor ID", hex), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_VERSION, "Device Version", str), NULL },
{ CLINFO_BOTH, DINFO(CL_DRIVER_VERSION, "Driver Version", str), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_OPENCL_C_VERSION, "Device OpenCL C Version", str), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_EXTENSIONS, "Device Extensions", str_get), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_TYPE, "Device Type", devtype), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_BOARD_NAME_AMD, "Device Board Name (AMD)", str), dev_has_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_TOPOLOGY_AMD, "Device Topology (AMD)", devtopo_amd), dev_has_amd },
/* Device Topology (NV) is multipart, so different for HUMAN and RAW */
{ CLINFO_HUMAN, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device Topology (NV)", devtopo_nv), dev_has_nv },
{ CLINFO_RAW, DINFO(CL_DEVICE_PCI_BUS_ID_NV, "Device PCI bus (NV)", int), dev_has_nv },
{ CLINFO_RAW, DINFO(CL_DEVICE_PCI_SLOT_ID_NV, "Device PCI slot (NV)", int), dev_has_nv },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PROFILE, "Device Profile", str), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE, "Device Available", bool), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_COMPILER_AVAILABLE, "Compiler Available", bool), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_LINKER_AVAILABLE, "Linker Available", bool), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_COMPUTE_UNITS, "Max compute units", int), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, "SIMD per compute unit (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_WIDTH_AMD, "SIMD width (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, "SIMD instruction width (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MAX_CLOCK_FREQUENCY, "Max clock frequency", "MHz", int), NULL },
/* Device Compute Capability (NV) is multipart, so different for HUMAN and RAW */
{ CLINFO_HUMAN, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, "Compute Capability (NV)", cc_nv), dev_has_nv },
{ CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, INDENT "Compute Capability Major (NV)", int), dev_has_nv },
{ CLINFO_RAW, DINFO(CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, INDENT "Compute Capability Minor (NV)", int), dev_has_nv },
/* GFXIP (AMD) is multipart, so different for HUMAN and RAW */
/* TODO: find a better human-friendly name than GFXIP; v3 of the cl_amd_device_attribute_query
* extension specification calls it “core engine GFXIP”, which honestly is not better than
* our name choice. */
{ CLINFO_HUMAN, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, "Graphics IP (AMD)", gfxip_amd), dev_is_gpu_amd },
{ CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MAJOR_AMD, INDENT "Graphics IP MAJOR (AMD)", int), dev_is_gpu_amd },
{ CLINFO_RAW, DINFO(CL_DEVICE_GFXIP_MINOR_AMD, INDENT "Graphics IP MINOR (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_CORE_TEMPERATURE_ALTERA, "Core Temperature (Altera)", " C", int), dev_has_altera_dev_temp },
/* Device partition support: summary is only presented in HUMAN case */
{ CLINFO_HUMAN, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, "Device Partition", partition_header), dev_has_partition },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, INDENT "Max number of sub-devices", int), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_PROPERTIES, INDENT "Supported partition types", partition_types), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, INDENT "Supported affinity domains", partition_affinities), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PARTITION_TYPES_EXT, INDENT "Supported partition types (ext)", partition_types_ext), dev_has_fission },
{ CLINFO_BOTH, DINFO(CL_DEVICE_AFFINITY_DOMAINS_EXT, INDENT "Supported affinity domains (ext)", partition_affinities_ext), dev_has_fission },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "Max work item dimensions", int), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_ITEM_SIZES, "Max work item sizes", szptr_times), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE, "Max work group size", sz), NULL },
/* cl_amd_device_attribute_query v4 */
{ CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD, "Preferred work group size (AMD)", sz), dev_has_amd_v4 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD, "Max work group size (AMD)", sz), dev_has_amd_v4 },
{ CLINFO_BOTH, DINFO(CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "Preferred work group size multiple", wg), dev_has_compiler },
{ CLINFO_BOTH, DINFO(CL_DEVICE_WARP_SIZE_NV, "Warp size (NV)", int), dev_has_nv },
{ CLINFO_BOTH, DINFO(CL_DEVICE_WAVEFRONT_WIDTH_AMD, "Wavefront width (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NUM_SUB_GROUPS, "Max sub-groups per work group", int), dev_is_21 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, "Max named sub-group barriers", int), dev_has_subgroup_named_barrier },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_SIZES_INTEL, "Sub-group sizes (Intel)", szptr_comma), dev_has_intel_required_subgroup_size },
/* Preferred/native vector widths: header is only presented in HUMAN case, that also pairs
* PREFERRED and NATIVE in a single line */
#define DINFO_VECWIDTH(Type, type) \
{ CLINFO_HUMAN, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, vecwidth), NULL }, \
{ CLINFO_RAW, DINFO(CL_DEVICE_PREFERRED_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }, \
{ CLINFO_RAW, DINFO(CL_DEVICE_NATIVE_VECTOR_WIDTH_##Type, INDENT #type, int), NULL }
{ CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred / native vector sizes", str), NULL },
DINFO_VECWIDTH(CHAR, char),
DINFO_VECWIDTH(SHORT, short),
DINFO_VECWIDTH(INT, int),
DINFO_VECWIDTH(LONG, long),
DINFO_VECWIDTH(HALF, half),
DINFO_VECWIDTH(FLOAT, float),
DINFO_VECWIDTH(DOUBLE, double),
/* Floating point configurations */
#define DINFO_FPCONF(Type, type, cond) \
{ CLINFO_BOTH, DINFO(CL_DEVICE_##Type##_FP_CONFIG, #type "-precision Floating-point support", fpconf), NULL }
DINFO_FPCONF(HALF, Half, dev_has_half),
DINFO_FPCONF(SINGLE, Single, NULL),
DINFO_FPCONF(DOUBLE, Double, dev_has_double),
/* Address bits and endianness are written together for HUMAN, separate for RAW */
{ CLINFO_HUMAN, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", arch), NULL },
{ CLINFO_RAW, DINFO(CL_DEVICE_ADDRESS_BITS, "Address bits", int), NULL },
{ CLINFO_RAW, DINFO(CL_DEVICE_ENDIAN_LITTLE, "Little Endian", bool), NULL },
/* Global memory */
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size", mem), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, "Global free memory (AMD)", free_mem_amd), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, "Global memory channels (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, "Global memory banks per channel (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, "Global memory bank width (AMD)", bytes_str, int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_ERROR_CORRECTION_SUPPORT, "Error Correction support", bool), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_MEM_ALLOC_SIZE, "Max memory allocation", mem), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_HOST_UNIFIED_MEMORY, "Unified memory for Host and Device", bool), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_INTEGRATED_MEMORY_NV, "Integrated memory (NV)", bool), dev_has_nv },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES, "Shared Virtual Memory (SVM) capabilities", svm_cap), dev_has_svm },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SVM_CAPABILITIES_ARM, "Shared Virtual Memory (SVM) capabilities (ARM)", svm_cap), dev_has_arm_svm },
/* Alignment */
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, "Minimum alignment for any data type", bytes_str, int), NULL },
{ CLINFO_HUMAN, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", bits), NULL },
{ CLINFO_RAW, DINFO(CL_DEVICE_MEM_BASE_ADDR_ALIGN, "Alignment of base address", int), NULL },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PAGE_SIZE_QCOM, "Page size (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, "External memory padding (QCOM)", bytes_str, sz), dev_has_qcom_ext_host_ptr },
/* Atomics alignment, with HUMAN-only header */
{ CLINFO_HUMAN, DINFO(CL_FALSE, "Preferred alignment for atomics", str), dev_is_20 },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, INDENT "SVM", bytes_str, int), dev_is_20 },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, INDENT "Global", bytes_str, int), dev_is_20 },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, INDENT "Local", bytes_str, int), dev_is_20 },
/* Global variables. TODO some 1.2 devices respond to this too */
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, "Max size for global variable", mem), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, "Preferred total size of global vars", mem), dev_is_20 },
/* Global memory cache */
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, "Global Memory cache type", cachetype), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, "Global Memory cache size", mem), dev_has_cache },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, "Global Memory cache line size", " bytes", int), dev_has_cache },
/* Image support */
{ CLINFO_BOTH, DINFO(CL_DEVICE_IMAGE_SUPPORT, "Image support", bool), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_SAMPLERS, INDENT "Max number of samplers per kernel", int), dev_has_images },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, INDENT "Max size for 1D images from buffer", pixels_str, sz), dev_has_images_12 },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, INDENT "Max 1D or 2D image array size", images_str, sz), dev_has_images_12 },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, INDENT "Base address alignment for 2D image buffers", bytes_str, sz), dev_has_image2d_buffer },
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, INDENT "Pitch alignment for 2D image buffers", pixels_str, sz), dev_has_image2d_buffer },
/* Image dimensions are split for RAW, combined for HUMAN */
{ CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image size", pixels_str, img_sz_2d), dev_has_images },
{ CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_HEIGHT, INDENT "Max 2D image height", sz), dev_has_images },
{ CLINFO_RAW, DINFO(CL_DEVICE_IMAGE2D_MAX_WIDTH, INDENT "Max 2D image width", sz), dev_has_images },
{ CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image size", pixels_str, img_sz_2d), dev_has_intel_planar_yuv },
{ CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL, INDENT "Max planar YUV image height", sz), dev_has_intel_planar_yuv },
{ CLINFO_RAW, DINFO(CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL, INDENT "Max planar YUV image width", sz), dev_has_intel_planar_yuv },
{ CLINFO_HUMAN, DINFO_SFX(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image size", pixels_str, img_sz_3d), dev_has_images },
{ CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_HEIGHT, INDENT "Max 3D image height", sz), dev_has_images },
{ CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_WIDTH, INDENT "Max 3D image width", sz), dev_has_images },
{ CLINFO_RAW, DINFO(CL_DEVICE_IMAGE3D_MAX_DEPTH, INDENT "Max 3D image depth", sz), dev_has_images },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_IMAGE_ARGS, INDENT "Max number of read image args", int), dev_has_images },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, INDENT "Max number of write image args", int), dev_has_images },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, INDENT "Max number of read/write image args", int), dev_has_images_20 },
/* Pipes */
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PIPE_ARGS, "Max number of pipe args", int), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, "Max active pipe reservations", int), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PIPE_MAX_PACKET_SIZE, "Max pipe packet size", mem_int), dev_is_20 },
/* Local memory */
{ CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_TYPE, "Local memory type", lmemtype), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE, "Local memory size", mem), dev_has_lmem },
{ CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, "Local memory syze per CU (AMD)", mem), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_LOCAL_MEM_BANKS_AMD, "Local memory banks (AMD)", int), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_REGISTERS_PER_BLOCK_NV, "Registers per block (NV)", int), dev_has_nv },
/* Constant memory */
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_ARGS, "Max number of constant args", int), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "Max constant buffer size", mem), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD, "Preferred constant buffer size (AMD)", mem_sz), dev_has_amd_v4 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_PARAMETER_SIZE, "Max size of kernel argument", mem), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, "Max number of atomic counters", sz), dev_has_atomic_counters },
/* Queue properties */
{ CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_PROPERTIES, "Queue properties", qprop), dev_not_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, "Queue properties (on host)", qprop), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, "Queue properties (on device)", qprop), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, INDENT "Preferred size", mem), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, INDENT "Max size", mem), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_QUEUES, "Max queues on device", int), dev_is_20 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_ON_DEVICE_EVENTS, "Max events on device", int), dev_is_20 },
/* Terminate context */
{ CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x, "Terminate capability (1.2 define)", terminate_capability), dev_has_terminate_context },
{ CLINFO_BOTH, DINFO(CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x, "Terminate capability (2.x define)", terminate_capability), dev_has_terminate_context },
/* Interop */
{ CLINFO_BOTH, DINFO(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, "Prefer user sync for interop", bool), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL, "Number of simultaneous interops (Intel)", int), dev_has_simultaneous_sharing },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL, "Simultaneous interops", interop_list), dev_has_simultaneous_sharing },
/* P2P buffer copy */
{ CLINFO_BOTH, DINFO(CL_DEVICE_NUM_P2P_DEVICES_AMD, "Number of P2P devices (AMD)", int), dev_has_p2p },
{ CLINFO_BOTH, DINFO(CL_DEVICE_P2P_DEVICES_AMD, "P2P devices (AMD)", p2p_dev_list), dev_has_p2p },
/* Profiling resolution */
{ CLINFO_BOTH, DINFO_SFX(CL_DEVICE_PROFILING_TIMER_RESOLUTION, "Profiling timer resolution", "ns", sz), NULL },
{ CLINFO_HUMAN, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", time_offset), dev_has_amd },
{ CLINFO_RAW, DINFO(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, "Profiling timer offset since Epoch (AMD)", long), dev_has_amd },
/* Kernel execution capabilities */
{ CLINFO_BOTH, DINFO(CL_DEVICE_EXECUTION_CAPABILITIES, "Execution capabilities", execap), NULL },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, INDENT "Sub-group independent forward progress", bool), dev_is_21 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, INDENT "Thread trace supported (AMD)", bool), dev_is_gpu_amd },
{ CLINFO_BOTH, DINFO(CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, INDENT "Kernel execution timeout (NV)", bool), dev_has_nv },
{ CLINFO_BOTH, DINFO(CL_DEVICE_GPU_OVERLAP_NV, "Concurrent copy and kernel execution (NV)", bool), dev_has_nv },
{ CLINFO_BOTH, DINFO(CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV, INDENT "Number of async copy engines", int), dev_has_nv },
{ CLINFO_BOTH, DINFO(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, INDENT "Number of async queues (AMD)", int), dev_has_amd_v4 },
/* TODO FIXME undocumented, experimental */
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, INDENT "Max real-time compute queues (AMD)", int), dev_has_amd_v4 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, INDENT "Max real-time compute units (AMD)", int), dev_has_amd_v4 },
/* TODO: this should tell if it's being done due to the device being 2.1 or due to it having the extension */
{ CLINFO_BOTH, DINFO(CL_DEVICE_IL_VERSION, INDENT "IL version", str), dev_has_il },
{ CLINFO_BOTH, DINFO(CL_DEVICE_SPIR_VERSIONS, INDENT "SPIR versions", str), dev_has_spir },
{ CLINFO_BOTH, DINFO(CL_DEVICE_PRINTF_BUFFER_SIZE, "printf() buffer size", mem_sz), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_BUILT_IN_KERNELS, "Built-in kernels", str), dev_is_12 },
{ CLINFO_BOTH, DINFO(CL_DEVICE_ME_VERSION_INTEL, "Motion Estimation accelerator version (Intel)", int), dev_has_intel_AME },
{ CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_VERSION_INTEL, INDENT "Device-side AVC Motion Estimation version", int), dev_has_intel_AVC_ME },
{ CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL, INDENT INDENT "Supports texture sampler use", bool), dev_has_intel_AVC_ME },
{ CLINFO_BOTH, DINFO(CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL, INDENT INDENT "Supports preemption", bool), dev_has_intel_AVC_ME },
};
/* Process all the device info in the traits, except if param_whitelist is not NULL,
* in which case only those in the whitelist will be processed.
* If present, the whitelist should be sorted in the order of appearance of the parameters
* in the traits table, and terminated by the value CL_FALSE
*/
void
printDeviceInfo(const cl_device_id *device, cl_uint p, cl_uint d,
const cl_device_info *param_whitelist) /* list of device info to process, or NULL */
{
cl_device_id dev = device[d];
char *extensions = NULL;
/* pointer to the traits for CL_DEVICE_EXTENSIONS */
const struct device_info_traits *extensions_traits = NULL;
struct device_info_checks chk;
memset(&chk, 0, sizeof(chk));
chk.pinfo_checks = platform_checks + p;
chk.dev_version = 10;
current_function = __func__;
for (current_line = 0; current_line < ARRAY_SIZE(dinfo_traits); ++current_line) {
const struct device_info_traits *traits = dinfo_traits + current_line;
const char *pname = (output_mode == CLINFO_HUMAN ?
traits->pname : traits->sname);
/* checked is true if there was no condition to check for, or if the
* condition was satisfied
*/
int checked = !(traits->check_func && !traits->check_func(&chk));
current_param = traits->sname;
/* Whitelist check: finish if done traversing the list,
* skip current param if it's not the right one
*/
if (cond_prop_mode == COND_PROP_CHECK && param_whitelist) {
if (*param_whitelist == CL_FALSE)
break;
if (traits->param != *param_whitelist)
continue;
++param_whitelist;
}
/* skip if it's not for this output mode */
if (!(output_mode & traits->output_mode))
continue;
if (cond_prop_mode == COND_PROP_CHECK && !checked)
continue;
cur_sfx = (output_mode == CLINFO_HUMAN && traits->sfx) ? traits->sfx : empty_str;
/* Handle headers */
if (traits->param == CL_FALSE) {
strbuf[0] = '\0';
show_strbuf(pname, 0);
had_error = CL_FALSE;
continue;
}
strbuf[0] = '\0';
had_error = traits->show_func(dev, traits->param,
pname, &chk, checked);
if (traits->param == CL_DEVICE_EXTENSIONS) {
/* make a backup of the extensions string, regardless of
* errors */
size_t len = strlen(strbuf);
extensions_traits = traits;
ALLOC(extensions, len+1, "extensions");
memcpy(extensions, strbuf, len);
extensions[len] = '\0';
}
if (had_error)
continue;
switch (traits->param) {
case CL_DEVICE_VERSION:
/* compute numeric value for OpenCL version */
chk.dev_version = getOpenCLVersion(strbuf + 7);
break;
case CL_DEVICE_EXTENSIONS:
identify_device_extensions(extensions, &chk);
break;
case CL_DEVICE_TYPE:
/* strbuf was abused to give us the dev type */
memcpy(&(chk.devtype), strbuf, sizeof(chk.devtype));
break;
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
/* strbuf was abused to give us the cache type */
memcpy(&(chk.cachetype), strbuf, sizeof(chk.cachetype));
break;
case CL_DEVICE_LOCAL_MEM_TYPE:
/* strbuf was abused to give us the lmem type */
memcpy(&(chk.lmemtype), strbuf, sizeof(chk.lmemtype));
break;
case CL_DEVICE_IMAGE_SUPPORT:
/* strbuf was abused to give us boolean value */
memcpy(&(chk.image_support), strbuf, sizeof(chk.image_support));
break;
case CL_DEVICE_COMPILER_AVAILABLE:
/* strbuf was abused to give us boolean value */
memcpy(&(chk.compiler_available), strbuf, sizeof(chk.compiler_available));
break;
default:
/* do nothing */
break;
}
}
// and finally the extensions, if we retrieved them
if (extensions)
printf("%s" I1_STR "%s\n", line_pfx, (output_mode == CLINFO_HUMAN ?
extensions_traits->pname :
extensions_traits->sname), extensions);
free(extensions);
extensions = NULL;
}
/* list of allowed properties for AMD offline devices */
/* everything else seems to be set to 0, and all the other string properties
* actually segfault the driver */
static const cl_device_info amd_offline_info_whitelist[] = {
CL_DEVICE_NAME,
/* These are present, but all the same, so just skip them:
CL_DEVICE_VENDOR,
CL_DEVICE_VENDOR_ID,
CL_DEVICE_VERSION,
CL_DRIVER_VERSION,
CL_DEVICE_OPENCL_C_VERSION,
*/
CL_DEVICE_EXTENSIONS,
CL_DEVICE_TYPE,
CL_DEVICE_GFXIP_MAJOR_AMD,
CL_DEVICE_GFXIP_MINOR_AMD,
CL_DEVICE_MAX_WORK_GROUP_SIZE,
CL_FALSE
};
/* process offline devices from the cl_amd_offline_devices extension */
int processOfflineDevicesAMD(cl_uint p)
{
int ret = 0;
cl_platform_id pid = platform[p];
cl_device_id *device = NULL;
cl_int num_devs, d;
cl_context_properties ctxpft[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties)pid,
CL_CONTEXT_OFFLINE_DEVICES_AMD, (cl_context_properties)CL_TRUE,
0
};
cl_context ctx = NULL;
if (!list_only)
printf("%s" I0_STR, line_pfx,
(output_mode == CLINFO_HUMAN ?
"Number of offline devices (AMD)" : "#OFFDEVICES"));
ctx = clCreateContextFromType(ctxpft, CL_DEVICE_TYPE_ALL, NULL, NULL, &error);
RR_ERROR("create context");
error = clGetContextInfo(ctx, CL_CONTEXT_NUM_DEVICES, sizeof(num_devs), &num_devs, NULL);
RR_ERROR("get num devs");
ALLOC(device, num_devs, "offline devices");
error = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, num_devs*sizeof(*device), device, NULL);
RR_ERROR("get devs");
if (!list_only)
printf("%d\n", num_devs);
for (d = 0; d < num_devs; ++d) {
if (list_only) {
/*
if (output_mode == CLINFO_HUMAN)
puts(" |");
*/
if (d == num_devs - 1 && output_mode != CLINFO_RAW)
line_pfx[1] = '`';
had_error = device_info_str_get(device[d], CL_DEVICE_NAME, "CL_DEVICE_NAME", NULL, CL_TRUE);
printf("%s%u: %s\n", line_pfx, d, strbuf);
} else {
if (line_pfx_len > 0) {
sprintf(strbuf, "[%s/%u]", pdata[p].sname, -d);
sprintf(line_pfx, "%*s", -line_pfx_len, strbuf);
}
printDeviceInfo(device, p, d, amd_offline_info_whitelist);
if (d < num_devs - 1)
puts("");
}
fflush(stdout);
fflush(stderr);
}
had_error = CL_FALSE;
out:
free(device);
if (ctx)
clReleaseContext(ctx);
return ret;
}
void listPlatformsAndDevices(cl_bool show_offline)
{
cl_uint p, d;
cl_device_id *device;
if (output_mode == CLINFO_RAW)
sprintf(strbuf, "%u", num_platforms);
else
sprintf(strbuf, " +-- %sDevice #", (show_offline ? "Offline" : ""));
line_pfx_len = (int)(strlen(strbuf) + 1);
REALLOC(line_pfx, line_pfx_len, "line prefix");
for (p = 0, device = all_devices; p < num_platforms; device += pdata[p++].ndevs) {
printf("%s%u: %s\n",
(output_mode == CLINFO_HUMAN ? "Platform #" : ""),
p, pdata[p].pname);
if (output_mode == CLINFO_RAW)
sprintf(line_pfx, "%u:", p);
else
sprintf(line_pfx, " +-- Device #");
if (pdata[p].ndevs > 0) {
error = clGetDeviceIDs(platform[p], CL_DEVICE_TYPE_ALL, pdata[p].ndevs, device, NULL);
CHECK_ERROR("device IDs");
for (d = 0; d < pdata[p].ndevs; ++d) {
/*
if (output_mode == CLINFO_HUMAN)
puts(" |");
*/
cl_bool last_device = (d == pdata[p].ndevs - 1 && output_mode != CLINFO_RAW &&
(!show_offline || !pdata[p].has_amd_offline));
if (last_device)
line_pfx[1] = '`';
had_error = device_info_str_get(device[d], CL_DEVICE_NAME, "CL_DEVICE_NAME", NULL, CL_TRUE);
printf("%s%u: %s\n", line_pfx, d, strbuf);
fflush(stdout);
fflush(stderr);
}
}
if (show_offline && pdata[p].has_amd_offline) {
if (output_mode == CLINFO_RAW)
sprintf(line_pfx, "%u*", p);
else
sprintf(line_pfx, " +-- Offline Device #");
had_error = processOfflineDevicesAMD(p);
if (had_error)
puts(strbuf);
}
}
}
void showDevices(cl_bool show_offline)
{
cl_uint p, d;
cl_device_id *device;
/* TODO consider enabling this for both output modes */
if (output_mode == CLINFO_RAW) {
sprintf(strbuf, "%u", maxdevs);
line_pfx_len = (int)(platform_sname_maxlen + strlen(strbuf) + 4);
REALLOC(line_pfx, line_pfx_len, "line prefix");
}
for (p = 0, device = all_devices; p < num_platforms; device += pdata[p++].ndevs) {
if (line_pfx_len > 0) {
sprintf(strbuf, "[%s/*]", pdata[p].sname);
sprintf(line_pfx, "%*s", -line_pfx_len, strbuf);
}
printf("%s" I1_STR "%s\n",
line_pfx,
(output_mode == CLINFO_HUMAN ?
pinfo_traits[0].pname : pinfo_traits[0].sname),
pdata[p].pname);
printf("%s" I0_STR "%u\n",
line_pfx,
(output_mode == CLINFO_HUMAN ?
"Number of devices" : "#DEVICES"),
pdata[p].ndevs);
if (pdata[p].ndevs > 0) {
error = clGetDeviceIDs(platform[p], CL_DEVICE_TYPE_ALL, pdata[p].ndevs, device, NULL);
CHECK_ERROR("device IDs");
}
for (d = 0; d < pdata[p].ndevs; ++d) {
if (line_pfx_len > 0) {
sprintf(strbuf, "[%s/%u]", pdata[p].sname, d);
sprintf(line_pfx, "%*s", -line_pfx_len, strbuf);
}
printDeviceInfo(device, p, d, NULL);
if (d < pdata[p].ndevs - 1)
puts("");
fflush(stdout);
fflush(stderr);
}
if (show_offline && pdata[p].has_amd_offline) {
puts("");
had_error = processOfflineDevicesAMD(p);
if (had_error)
puts(strbuf);
}
puts("");
}
}
/* check the behavior of clGetPlatformInfo() when given a NULL platform ID */
void checkNullGetPlatformName(void)
{
current_param = "CL_PLATFORM_NAME";
error = clGetPlatformInfo(NULL, CL_PLATFORM_NAME, bufsz, strbuf, NULL);
if (error == CL_INVALID_PLATFORM) {
bufcpy(0, no_plat());
} else {
current_line = __LINE__+1;
had_error = REPORT_ERROR2("get %s");
}
printf(I1_STR "%s\n",
"clGetPlatformInfo(NULL, CL_PLATFORM_NAME, ...)", strbuf);
}
/* check the behavior of clGetDeviceIDs() when given a NULL platform ID;
* return the index of the default platform in our array of platform IDs,
* or num_platforms (which is an invalid platform index) in case of errors
* or no platform or device found.
*/
cl_uint checkNullGetDevices(void)
{
cl_uint i = 0; /* generic iterator */
cl_device_id dev = NULL; /* sample device */
cl_platform_id plat = NULL; /* detected platform */
cl_uint found = 0; /* number of platforms found */
cl_uint pidx = num_platforms; /* index of the platform found */
cl_uint numdevs = 0;
current_function = __func__;
current_param = "device IDs";
error = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 0, NULL, &numdevs);
/* TODO we should check other CL_DEVICE_TYPE_* combinations, since a smart
* implementation might give you a different default platform for GPUs
* and for CPUs.
* Of course the “no devices” case would then need to be handled differently.
* The logic might be maintained similarly, provided we also gather
* the number of devices of each type for each platform, although it's
* obviously more likely to have multiple platforms with no devices
* of a given type.
*/
switch (error) {
case CL_INVALID_PLATFORM:
bufcpy(0, no_plat());
break;
case CL_DEVICE_NOT_FOUND:
/* No devices were found, see if there are platforms with
* no devices, and if there's only one, assume this is the
* one being used as default by the ICD loader */
for (i = 0; i < num_platforms; ++i) {
if (pdata[i].ndevs == 0) {
++found;
if (found > 1)
break;
else {
plat = platform[i];
pidx = i;
}
}
}
switch (found) {
case 0:
bufcpy(0, (output_mode == CLINFO_HUMAN ?
"" :
"CL_DEVICE_NOT_FOUND | CL_INVALID_PLATFORM"));
break;
case 1:
bufcpy(0, (output_mode == CLINFO_HUMAN ?
pdata[pidx].pname :
pdata[pidx].sname));
break;
default: /* found > 1 */
bufcpy(0, (output_mode == CLINFO_HUMAN ?
"" :
"CL_DEVICE_NOT_FOUND | ????"));
break;
}
break;
default:
current_line = __LINE__+1;
had_error = REPORT_ERROR2("get number of %s");
if (had_error)
break;
/* Determine platform by looking at the CL_DEVICE_PLATFORM of
* one of the devices */
error = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 1, &dev, NULL);
current_line = __LINE__+1;
had_error = REPORT_ERROR2("get %s");
if (had_error)
break;
current_param = "CL_DEVICE_PLATFORM";
error = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM,
sizeof(plat), &plat, NULL);
current_line = __LINE__+1;
had_error = REPORT_ERROR2("get %s");
if (had_error)
break;
for (i = 0; i < num_platforms; ++i) {
if (platform[i] == plat) {
pidx = i;
sprintf(strbuf, "%s [%s]",
(output_mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
pdata[i].sname);
break;
}
}
if (i == num_platforms) {
sprintf(strbuf, "", (void*)plat);
}
}
printf(I1_STR "%s\n",
"clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, ...)", strbuf);
return pidx;
}
void checkNullCtx(cl_uint pidx, const cl_device_id *dev, const char *which)
{
cl_context ctx = clCreateContext(NULL, 1, dev, NULL, NULL, &error);
current_function = __func__;
current_param = which;
current_line = __LINE__+2;
had_error = REPORT_ERROR2("create context with device from %s platform");
if (!had_error)
sprintf(strbuf, "%s [%s]",
(output_mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
pdata[pidx].sname);
if (ctx) {
clReleaseContext(ctx);
ctx = NULL;
}
}
/* check behavior of clCreateContextFromType() with NULL cl_context_properties */
void checkNullCtxFromType(void)
{
size_t t; /* type iterator */
size_t i; /* generic iterator */
char def[1024];
cl_context ctx = NULL;
size_t ndevs = 8;
size_t szval = 0;
size_t cursz = ndevs*sizeof(cl_device_id);
cl_platform_id plat = NULL;
cl_device_id *devs = NULL;
const char *platname_prop = (output_mode == CLINFO_HUMAN ?
pinfo_traits[0].pname :
pinfo_traits[0].sname);
const char *devname_prop = (output_mode == CLINFO_HUMAN ?
dinfo_traits[0].pname :
dinfo_traits[0].sname);
ALLOC(devs, ndevs, "context devices");
current_function = __func__;
for (t = 1; t < devtype_count; ++t) { /* we skip 0 */
current_param = device_type_raw_str[t];
sprintf(strbuf, "clCreateContextFromType(NULL, %s)", current_param);
sprintf(def, I1_STR, strbuf);
current_line = __LINE__+1;
ctx = clCreateContextFromType(NULL, devtype[t], NULL, NULL, &error);
switch (error) {
case CL_INVALID_PLATFORM:
bufcpy(0, no_plat()); break;
case CL_DEVICE_NOT_FOUND:
bufcpy(0, no_dev_found()); break;
case CL_INVALID_DEVICE_TYPE: /* e.g. _CUSTOM device on 1.1 platform */
bufcpy(0, invalid_dev_type()); break;
case CL_INVALID_VALUE: /* This is what apple returns for the case above */
bufcpy(0, invalid_dev_type()); break;
case CL_DEVICE_NOT_AVAILABLE:
bufcpy(0, no_dev_avail()); break;
default:
had_error = REPORT_ERROR2("create context from type %s");
if (had_error)
break;
/* get the devices */
current_param = "CL_CONTEXT_DEVICES";
current_line = __LINE__+2;
error = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &szval);
had_error = REPORT_ERROR2("get %s size");
if (had_error)
break;
if (szval > cursz) {
REALLOC(devs, szval, "context devices");
cursz = szval;
}
current_line = __LINE__+1;
error = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, cursz, devs, NULL);
had_error = REPORT_ERROR2("get %s");
if (had_error)
break;
ndevs = szval/sizeof(cl_device_id);
if (ndevs < 1) {
bufcpy(0, "");
}
/* get the platform from the first device */
current_param = "CL_DEVICE_PLATFORM";
current_line = __LINE__+1;
error = clGetDeviceInfo(*devs, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
had_error = REPORT_ERROR2("get %s");
if (had_error)
break;
szval = 0;
for (i = 0; i < num_platforms; ++i) {
if (platform[i] == plat)
break;
}
if (i == num_platforms) {
sprintf(strbuf, "", (void*)plat);
break;
} else {
szval += sprintf(strbuf, "%s (%" PRIuS ")",
(output_mode == CLINFO_HUMAN ? "Success" : "CL_SUCCESS"),
ndevs);
szval += snprintf(strbuf + szval, bufsz - szval, "\n" I2_STR "%s",
platname_prop, pdata[i].pname);
}
for (i = 0; i < ndevs; ++i) {
size_t szname = 0;
/* for each device, show the device name */
/* TODO some other unique ID too, e.g. PCI address, if available? */
szval += snprintf(strbuf + szval, bufsz - szval, "\n" I2_STR, devname_prop);
if (szval >= bufsz) {
trunc_strbuf();
break;
}
current_param = "CL_DEVICE_NAME";
current_line = __LINE__+1;
error = clGetDeviceInfo(devs[i], CL_DEVICE_NAME, bufsz - szval, strbuf + szval, &szname);
had_error = REPORT_ERROR2("get %s");
if (had_error)
break;
szval += szname - 1;
}
if (i != ndevs)
break; /* had an error earlier, bail */
}
if (ctx) {
clReleaseContext(ctx);
ctx = NULL;
}
printf("%s%s\n", def, strbuf);
}
free(devs);
}
/* check the behavior of NULL platform in clGetDeviceIDs (see checkNullGetDevices)
* and in clCreateContext() */
void checkNullBehavior(void)
{
cl_device_id *dev = NULL;
cl_uint p = 0;
cl_uint pidx;
printf("NULL platform behavior\n");
checkNullGetPlatformName();
pidx = checkNullGetDevices();
/* If there's a default platform, and it has devices, try
* creating a context with its first device and see if it works */
if (pidx == num_platforms) {
bufcpy(0, no_plat());
} else if (pdata[pidx].ndevs == 0) {
bufcpy(0, no_dev_found());
} else {
p = 0;
dev = all_devices;
while (p < num_platforms && p != pidx) {
dev += pdata[p++].ndevs;
}
if (p < num_platforms) {
checkNullCtx(pidx, dev, "default");
} else {
/* this shouldn't happen, but still ... */
bufcpy(0, "");
}
}
printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [default]", strbuf);
/* Look for a device from a non-default platform, if there are any */
if (pidx == num_platforms || num_platforms > 1) {
p = 0;
dev = all_devices;
while (p < num_platforms && (p == pidx || pdata[p].ndevs == 0)) {
dev += pdata[p++].ndevs;
}
if (p < num_platforms) {
checkNullCtx(p, dev, "non-default");
} else {
bufcpy(0, "");
}
printf(I1_STR "%s\n", "clCreateContext(NULL, ...) [other]", strbuf);
}
checkNullCtxFromType();
}
/* Get properties of the ocl-icd loader, if available */
/* All properties are currently char[] */
typedef enum {
CL_ICDL_OCL_VERSION=1,
CL_ICDL_VERSION=2,
CL_ICDL_NAME=3,
CL_ICDL_VENDOR=4,
} cl_icdl_info;
/* Function pointer to the ICD loader info function */
typedef cl_int (*icdl_info_fn_ptr)(cl_icdl_info, size_t, void*, size_t*);
icdl_info_fn_ptr clGetICDLoaderInfoOCLICD;
/* We want to auto-detect the OpenCL version supported by the ICD loader.
* To do this, we will progressively find symbols introduced in new APIs,
* until a NULL symbol is found.
*/
struct icd_loader_test {
cl_uint version;
const char *symbol;
} icd_loader_tests[] = {
{ 11, "clCreateSubBuffer" },
{ 12, "clCreateImage" },
{ 20, "clSVMAlloc" },
{ 21, "clGetHostTimer" },
{ 22, "clSetProgramSpecializationConstant" },
{ 0, NULL }
};
int
icdl_info_str(cl_icdl_info param, const char* pname)
{
GET_STRING2(clGetICDLoaderInfoOCLICD, param);
show_strbuf(pname, 1);
return had_error;
}
struct icdl_info_traits {
cl_icdl_info param; // CL_ICDL_*
const char *sname; // "CL_ICDL_*"
const char *pname; // "ICD loader *"
};
static const char * const oclicdl_pfx = "OCLICD";
#define LINFO(symbol, name) { symbol, #symbol, "ICD loader " name }
struct icdl_info_traits linfo_traits[] = {
LINFO(CL_ICDL_NAME, "Name"),
LINFO(CL_ICDL_VENDOR, "Vendor"),
LINFO(CL_ICDL_VERSION, "Version"),
LINFO(CL_ICDL_OCL_VERSION, "Profile")
};
/* The ICD loader info function must be retrieved via clGetExtensionFunctionAddress,
* which returns a void pointer.
* ISO C forbids assignments between function pointers and void pointers,
* but POSIX allows it. To compile without warnings even in -pedantic mode,
* we take advantage of the fact that we _can_ do the conversion via
* pointers-to-pointers. This is supported on most compilers, except
* for some rather old GCC versions whose strict aliasing rules are
* too strict. Disable strict aliasing warnings for these compilers.
*/
#if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
void oclIcdProps(void)
{
/* Counter that'll be used to walk the icd_loader_tests */
int i = 0;
/* We find the clGetICDLoaderInfoOCLICD extension address, which will be used
* to query the ICD loader properties.
* It should be noted that in this specific case we cannot replace the
* call to clGetExtensionFunctionAddress with a call to the superseding function
* clGetExtensionFunctionAddressForPlatform because the extension is in the
* loader itself, not in a specific platform.
*/
void *ptrHack = clGetExtensionFunctionAddress("clGetICDLoaderInfoOCLICD");
clGetICDLoaderInfoOCLICD = *(icdl_info_fn_ptr*)(&ptrHack);
/* Step #1: try to auto-detect the supported ICD loader version */
do {
struct icd_loader_test check = icd_loader_tests[i];
if (check.symbol == NULL)
break;
if (dlsym(DL_MODULE, check.symbol) == NULL)
break;
icdl_ocl_version_found = check.version;
++i;
} while (1);
/* Step #2: query proerties from extension, if available */
if (clGetICDLoaderInfoOCLICD != NULL) {
/* TODO think of a sensible header in CLINFO_RAW */
if (output_mode != CLINFO_RAW)
puts("\nICD loader properties");
current_function = __func__;
if (output_mode == CLINFO_RAW) {
line_pfx_len = (int)(strlen(oclicdl_pfx) + 5);
REALLOC(line_pfx, line_pfx_len, "line prefix OCL ICD");
sprintf(strbuf, "[%s/*]", oclicdl_pfx);
sprintf(line_pfx, "%*s", -line_pfx_len, strbuf);
}
for (current_line = 0; current_line < ARRAY_SIZE(linfo_traits); ++current_line) {
const struct icdl_info_traits *traits = linfo_traits + current_line;
current_param = traits->sname;
had_error = icdl_info_str(traits->param,
output_mode == CLINFO_HUMAN ?
traits->pname : traits->sname);
if (!had_error && traits->param == CL_ICDL_OCL_VERSION) {
icdl_ocl_version = getOpenCLVersion(strbuf + 7);
}
}
}
/* Step #3: show it */
if (output_mode == CLINFO_HUMAN) {
if (icdl_ocl_version &&
icdl_ocl_version != icdl_ocl_version_found) {
printf( "\tNOTE:\tyour OpenCL library declares to support OpenCL %u.%u,\n"
"\t\tbut it seems to support up to OpenCL %u.%u %s.\n",
icdl_ocl_version / 10, icdl_ocl_version % 10,
icdl_ocl_version_found / 10, icdl_ocl_version_found % 10,
icdl_ocl_version_found < icdl_ocl_version ?
"only" : "too");
}
if (icdl_ocl_version_found < max_plat_version) {
printf( "\tNOTE:\tyour OpenCL library only supports OpenCL %u.%u,\n"
"\t\tbut some installed platforms support OpenCL %u.%u.\n"
"\t\tPrograms using %u.%u features may crash\n"
"\t\tor behave unexepectedly\n",
icdl_ocl_version_found / 10, icdl_ocl_version_found % 10,
max_plat_version / 10, max_plat_version % 10,
max_plat_version / 10, max_plat_version % 10);
}
}
}
#if defined __GNUC__ && ((__GNUC__*10 + __GNUC_MINOR__) < 46)
#pragma GCC diagnostic warning "-Wstrict-aliasing"
#endif
void version(void)
{
puts("clinfo version 2.2.18.03.26");
}
void usage(void)
{
version();
puts("Display properties of all available OpenCL platforms and devices");
puts("Usage: clinfo [options ...]\n");
puts("Options:");
puts("\t--all-props, -a\t\ttry all properties, only show valid ones");
puts("\t--always-all-props, -A\t\tshow all properties, even if invalid");
puts("\t--human\t\thuman-friendly output (default)");
puts("\t--raw\t\traw output");
puts("\t--offline\talso show offline devices");
puts("\t--list, -l\tonly list the platforms and devices by name");
puts("\t-h, -?\t\tshow usage");
puts("\t--version, -v\tshow version\n");
puts("Defaults to raw mode if invoked with");
puts("a name that contains the string \"raw\"");
}
int main(int argc, char *argv[])
{
cl_uint p;
int a = 0;
cl_bool show_offline = CL_FALSE;
/* if there's a 'raw' in the program name, switch to raw output mode */
if (strstr(argv[0], "raw"))
output_mode = CLINFO_RAW;
/* process command-line arguments */
while (++a < argc) {
if (!strcmp(argv[a], "-a") || !strcmp(argv[a], "--all-props"))
cond_prop_mode = COND_PROP_TRY;
else if (!strcmp(argv[a], "-A") || !strcmp(argv[a], "--always-all-props"))
cond_prop_mode = COND_PROP_SHOW;
else if (!strcmp(argv[a], "--raw"))
output_mode = CLINFO_RAW;
else if (!strcmp(argv[a], "--human"))
output_mode = CLINFO_HUMAN;
else if (!strcmp(argv[a], "--offline"))
show_offline = CL_TRUE;
else if (!strcmp(argv[a], "-l") || !strcmp(argv[a], "--list"))
list_only = CL_TRUE;
else if (!strcmp(argv[a], "-?") || !strcmp(argv[a], "-h")) {
usage();
return 0;
} else if (!strcmp(argv[a], "--version") || !strcmp(argv[a], "-v")) {
version();
return 0;
} else {
fprintf(stderr, "ignoring unknown command-line parameter %s\n", argv[a]);
}
}
ALLOC(strbuf, 1024, "general string buffer");
bufsz = 1024;
error = clGetPlatformIDs(0, NULL, &num_platforms);
if (error != CL_PLATFORM_NOT_FOUND_KHR)
CHECK_ERROR("number of platforms");
if (!list_only)
printf(I0_STR "%u\n",
(output_mode == CLINFO_HUMAN ?
"Number of platforms" : "#PLATFORMS"),
num_platforms);
if (!num_platforms)
return 0;
ALLOC(platform, num_platforms, "platform IDs");
error = clGetPlatformIDs(num_platforms, platform, NULL);
CHECK_ERROR("platform IDs");
ALLOC(pdata, num_platforms, "platform data");
ALLOC(platform_checks, num_platforms, "platform checks data");
ALLOC(line_pfx, 1, "line prefix");
for (p = 0; p < num_platforms; ++p) {
printPlatformInfo(p);
if (!list_only)
puts("");
}
if (num_devs_all > 0) {
ALLOC(all_devices, num_devs_all, "device IDs");
}
if (list_only) {
listPlatformsAndDevices(show_offline);
} else {
showDevices(show_offline);
if (output_mode != CLINFO_RAW)
checkNullBehavior();
oclIcdProps();
}
return 0;
}
clinfo-2.2.18.03.26/src/ctx_prop.h 0000664 0000000 0000000 00000001524 13256213726 0016334 0 ustar 00root root 0000000 0000000 /* List of OpenCL context properties used to interoperate with a different API */
/* cl_khr_gl_sharing */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
/* cl_khr_dx9_media_sharing */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_khr_d3d10_sharing */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
/* cl_khr_d3d11_sharing */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
/* cl_intel_dx9_media_sharing */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_intel_va_api_media_sharing */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
clinfo-2.2.18.03.26/src/error.h 0000664 0000000 0000000 00000003114 13256213726 0015624 0 ustar 00root root 0000000 0000000 /* OpenCL error handling */
#include
#include "ext.h"
#include "fmtmacros.h"
cl_int error;
int
check_ocl_error(cl_int err, const char *what, const char *func, int line)
{
if (err != CL_SUCCESS) {
fflush(stdout);
fflush(stderr);
fprintf(stderr, "%s:%u: %s : error %d\n",
func, line, what, err);
fflush(stderr);
}
return err != CL_SUCCESS;
}
const char *current_function;
size_t current_line;
const char *current_param;
int
report_ocl_error(char *dstbuf, size_t sz, cl_int err, const char *fmt)
{
static char full_fmt[1024];
if (err != CL_SUCCESS) {
snprintf(full_fmt, 1024, "<%s:%" PRIuS ": %s : error %d>",
current_function, current_line, fmt, err);
snprintf(dstbuf, sz, full_fmt, current_param);
}
return err != CL_SUCCESS;
}
void
report_size_mismatch(char *dstbuf, size_t sz, size_t req, size_t ours)
{
snprintf(dstbuf, sz, "<%s:%" PRIuS ": %s : size mismatch "
"(requested %" PRIuS ", we offer %" PRIuS ")>",
current_function, current_line, current_param,
req, ours);
}
int
report_ocl_error_old(char *where, size_t sz, cl_int err, const char *what, const char *func, int line)
{
if (err != CL_SUCCESS) {
snprintf(where, sz, "<%s:%d: %s : error %d>",
func, line, what, err);
}
return err != CL_SUCCESS;
}
#define CHECK_ERROR(what) if (check_ocl_error(error, what, __func__, __LINE__)) exit(1)
#define REPORT_ERROR(what) report_ocl_error_old(strbuf, bufsz, error, what, __func__, __LINE__)
#define REPORT_ERROR2(what) report_ocl_error(strbuf, bufsz, error, what)
#define REPORT_SIZE_MISMATCH(req, ours) report_size_mismatch(strbuf, bufsz, req, ours)
clinfo-2.2.18.03.26/src/ext.h 0000664 0000000 0000000 00000020261 13256213726 0015275 0 ustar 00root root 0000000 0000000 /* Include OpenCL header, and define OpenCL extensions, since what is and is not
* available in the official headers is very system-dependent */
#ifndef _EXT_H
#define _EXT_H
/* We will use the deprecated clGetExtensionFunctionAddress,
* so let the headers know that we don't care about it being deprecated.
* The standard CL_USE_DEPRECATED_OPENCL_1_1_APIS define apparently
* doesn't work for macOS, so we'll just tell the compiler to not
* warn about deprecated functions.
* A more correct solution would be to suppress the warning only around the
* clGetExtensionFunctionAddress call, but honestly I just cleaned up that
* piece of code. And I'm actually wondering if it even makes sense to
* build that part of the code on macOS: does anybody actually use
* ocl-icd as OpenCL dispatcher on macOS?
*/
#ifdef __APPLE__
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#include
#else
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include
#endif
/* These two defines were introduced in the 1.2 headers
* on 2012-11-30, so earlier versions don't have them
* (e.g. Debian wheezy)
*/
#ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
#endif
/* 2.0 headers are not very common for the time being, so
* let's copy the defines for the new CL_DEVICE_* properties
* here.
*/
#ifndef CL_VERSION_2_0
#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C
#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D
#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A
#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E
#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F
#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050
#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051
#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054
#define CL_DEVICE_MAX_PIPE_ARGS 0x1055
#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056
#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058
#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059
#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
typedef cl_bitfield cl_device_svm_capabilities;
#endif
#ifndef CL_VERSION_2_1
#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905
#define CL_DEVICE_IL_VERSION 0x105B
#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C
#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D
#endif
/*
* Extensions
*/
/* cl_khr_icd */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
#define CL_PLATFORM_NOT_FOUND_KHR -1001
/* cl_amd_object_metadata */
#define CL_PLATFORM_MAX_KEYS_AMD 0x403C
/* cl_khr_fp64 */
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
/* cl_khr_fp16 */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* cl_khr_il_program */
#define CL_DEVICE_IL_VERSION_KHR 0x105B
/* cl_khr_terminate_context */
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x 0x200F
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR_2x 0x2031
/* TODO: I cannot find official definitions for these,
* so I'm currently extrapolating them from the specification
*/
typedef cl_bitfield cl_device_terminate_capability_khr;
#define CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR (1<<0)
/* cl_khr_subgroup_named_barrier */
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
/* cl_nv_device_attribute_query */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007
#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
/* cl_ext_atomic_counters_{32,64} */
#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032
/* cl_amd_device_attribute_query */
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C
/* These two are undocumented */
#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD 0x404D
#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E
/* These were added in v4 of the extension, but have values lower than
* than the older ones, and spanning around the cl_ext_atomic_counters_*
* define
*/
#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030
#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031
#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033
#define CL_DEVICE_PCIE_ID_AMD 0x4034
#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD
#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1
typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} cl_device_topology_amd;
#endif
/* cl_amd_offline_devices */
#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F
/* cl_amd_copy_buffer_p2p */
#define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088
#define CL_DEVICE_P2P_DEVICES_AMD 0x4089
/* cl_ext_device_fission */
#define cl_ext_device_fission 1
typedef cl_ulong cl_device_partition_property_ext;
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_intel_advanced_motion_estimation */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
/* cl_intel_device_side_avc_motion_estimation */
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
/* cl_intel_planar_yuv */
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
/* cl_qcom_ext_host_ptr */
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
/* cl_arm_shared_virtual_memory */
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
/* cl_khr_spir */
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
/* cl_altera_device_temperature */
#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3
/* cl_intel_simultaneous_sharing */
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
/* cl_intel_required_subgroup_size */
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
#endif
clinfo-2.2.18.03.26/src/fmtmacros.h 0000664 0000000 0000000 00000001326 13256213726 0016471 0 ustar 00root root 0000000 0000000 /* cl_ulong is always a 64bit integer, so in a few places
we want to use its shadow type uint64_t, and print the
values using PRIu64. We'll similarly define one for
size_t, to make support for non-standard/older compiler
easier.
*/
#ifndef _FMT_MACROS_H
#define _FMT_MACROS_H
#ifdef _WIN32
/* TODO FIXME WIN64 support */
# include
# include // size_t
# define PRIu32 "I32u"
# define PRIX32 "I32x"
# define PRIu64 "I64u"
# define PRIX64 "I64x"
# define PRIuS "Iu"
#if INTPTR_MAX <= INT32_MAX
# define PRIXPTR PRIX32
#else
# define PRIXPTR PRIX64
#endif
#else
# define __STDC_FORMAT_MACROS
# include
#endif
// size_t print spec
#ifndef PRIuS
# define PRIuS "zu"
#endif
#endif
clinfo-2.2.18.03.26/src/memory.h 0000664 0000000 0000000 00000000671 13256213726 0016010 0 ustar 00root root 0000000 0000000 /* Memory handling */
#include
#define CHECK_MEM(var, what) do { \
if (!var) { \
fprintf(stderr, "%s:%d: %s : Out of memory\n", \
__func__, __LINE__, what); \
exit(1); \
} \
} while (0)
#define ALLOC(var, num, what) do { \
var = calloc(num, sizeof(*(var))); \
CHECK_MEM(var, what); \
} while (0)
#define REALLOC(var, num, what) do { \
var = realloc(var, (num)*sizeof(*var)); \
CHECK_MEM(var, what); \
} while (0)
clinfo-2.2.18.03.26/src/ms_support.h 0000664 0000000 0000000 00000003116 13256213726 0016710 0 ustar 00root root 0000000 0000000 /* Missing functions and other misc stuff to support
* the horrible MS C compiler
*
* TODO could be improved by version-checking for C99 support
*/
// disable warning about unsafe strncpy vs strncpy_s usage
#pragma warning(disable : 4996)
// disable warning about constant conditional expressions
#pragma warning(disable : 4127)
// disable warning about non-constant aggregate initializer
#pragma warning(disable : 4204)
// disable warning about global shadowing
#pragma warning(disable : 4459)
// disable warning about parameter shadowing
#pragma warning(disable : 4457)
// Suppress warning about unused parameters. The macro definition
// _should_ work, but it doesn't on VS2012 (cl 17), may be a version thing
#define UNUSED(x) x __pragma(warning(suppress: 4100))
// TODO FIXME remove full-blown warning removal where not needed
#pragma warning(disable: 4100)
// No inline in MS C
#define inline __inline
// No snprintf in MS C, copy over implementation taken from
// stackoverflow
#include
#include
inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap)
{
int count = -1;
if (size != 0)
count = _vsnprintf_s(str, size, _TRUNCATE, format, ap);
if (count == -1)
count = _vscprintf(format, ap);
return count;
}
inline int c99_snprintf(char* str, size_t size, const char* format, ...)
{
int count;
va_list ap;
va_start(ap, format);
count = c99_vsnprintf(str, size, format, ap);
va_end(ap);
return count;
}
#define snprintf c99_snprintf
// And no __func__ either
#define __func__ __FUNCTION__
clinfo-2.2.18.03.26/src/strbuf.h 0000664 0000000 0000000 00000005200 13256213726 0015776 0 ustar 00root root 0000000 0000000 /* multi-purpose string strbuf, will be initialized to be
* at least 1024 bytes long.
*/
#include
#include
#include
#include "fmtmacros.h"
char *strbuf;
size_t bufsz, nusz;
#define GET_STRING(cmd, param, param_str, ...) do { \
error = cmd(__VA_ARGS__, param, 0, NULL, &nusz); \
if (REPORT_ERROR("get " param_str " size")) break; \
if (nusz > bufsz) { \
REALLOC(strbuf, nusz, #param); \
bufsz = nusz; \
} \
error = cmd(__VA_ARGS__, param, bufsz, strbuf, NULL); \
REPORT_ERROR("get " param_str); \
} while (0)
#define GET_STRING2(cmd, ...) do { \
error = cmd(__VA_ARGS__, 0, NULL, &nusz); \
had_error = REPORT_ERROR2("get %s size"); \
if (!had_error) { \
if (nusz > bufsz) { \
REALLOC(strbuf, nusz, current_param); \
bufsz = nusz; \
} \
error = cmd(__VA_ARGS__, bufsz, strbuf, NULL); \
had_error = REPORT_ERROR2("get %s"); \
} \
} while (0)
/* Skip leading whitespace in a string */
static inline const char* skip_leading_ws(const char *str)
{
const char *ret = str;
while (isspace(*ret)) ++ret;
return ret;
}
/* replace last 3 chars in strbuf with ... */
static const char ellip[] = "...";
static void trunc_strbuf(void)
{
memcpy(strbuf + bufsz - 4, ellip, 4);
}
/* copy a string to strbuf, at the given offset,
* returning the amount of bytes written (excluding the
* closing NULL byte)
*/
static inline size_t bufcpy_len(size_t offset, const char *str, size_t len)
{
size_t maxlen = bufsz - offset - 1;
char *dst = strbuf + offset;
int trunc = 0;
if (bufsz < offset) {
fprintf(stderr, "bufcpy overflow copying %s at offset %" PRIuS "/%" PRIuS " (%s)\n",
str, offset, bufsz, strbuf);
maxlen = 0;
trunc = 1;
}
if (len > maxlen) {
len = maxlen;
trunc = 1;
/* TODO enlarge strbuf instead, if maxlen > 0 */
}
memcpy(dst, str, len);
offset += len;
if (trunc)
trunc_strbuf();
else
strbuf[offset] = '\0';
return len;
}
/* As above, auto-compute string length */
static inline size_t bufcpy(size_t offset, const char *str)
{
return bufcpy_len(offset, str, strlen(str));
}
/* Separators: we want to be able to prepend separators as needed to strbuf,
* which we do only if halfway through the buffer. The callers should first
* call a 'set_separator' and then use add_separator(&offset) to add it, where szval
* is an offset inside the buffer, which will be incremented as needed
*/
const char *sep;
size_t sepsz;
void set_separator(const char* _sep)
{
sep = _sep;
sepsz = strlen(sep);
}
/* Note that no overflow check is done: it is assumed that strbuf will have enough room */
void add_separator(size_t *offset)
{
if (*offset)
*offset += bufcpy_len(*offset, sep, sepsz);
}