pax_global_header 0000666 0000000 0000000 00000000064 12260750563 0014520 g ustar 00root root 0000000 0000000 52 comment=0e431f5decdf24b4661c59d8c5a59730175d4e48
libclc-0~git20140101/ 0000775 0000000 0000000 00000000000 12260750563 0014164 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/CREDITS.TXT 0000664 0000000 0000000 00000000052 12260750563 0015657 0 ustar 00root root 0000000 0000000 N: Peter Collingbourne
E: peter@pcc.me.uk
libclc-0~git20140101/LICENSE.TXT 0000664 0000000 0000000 00000006273 12260750563 0015657 0 ustar 00root root 0000000 0000000 ==============================================================================
libclc License
==============================================================================
The libclc library is dual licensed under both the University of Illinois
"BSD-Like" license and the MIT license. As a user of this code you may choose
to use it under either license. As a contributor, you agree to allow your code
to be used under both.
Full text of the relevant licenses is included below.
==============================================================================
Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* The names of the contributors may not be used to endorse or promote
products derived from this Software without specific prior written
permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
==============================================================================
Copyright (c) 2011-2014 by the contributors listed in CREDITS.TXT
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
libclc-0~git20140101/README.TXT 0000664 0000000 0000000 00000002173 12260750563 0015525 0 ustar 00root root 0000000 0000000 libclc
------
libclc is an open source, BSD licensed implementation of the library
requirements of the OpenCL C programming language, as specified by the
OpenCL 1.1 Specification. The following sections of the specification
impose library requirements:
* 6.1: Supported Data Types
* 6.2.3: Explicit Conversions
* 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
* 6.9: Preprocessor Directives and Macros
* 6.11: Built-in Functions
* 9.3: Double Precision Floating-Point
* 9.4: 64-bit Atomics
* 9.5: Writing to 3D image memory objects
* 9.6: Half Precision Floating-Point
libclc is intended to be used with the Clang compiler's OpenCL frontend.
libclc is designed to be portable and extensible. To this end, it provides
generic implementations of most library requirements, allowing the target
to override the generic implementation at the granularity of individual
functions.
libclc currently only supports the PTX target, but support for more
targets is welcome.
Compiling
---------
./configure.py --with-llvm-config=/path/to/llvm-config && make
Website
-------
http://www.pcc.me.uk/~peter/libclc/
libclc-0~git20140101/build/ 0000775 0000000 0000000 00000000000 12260750563 0015263 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/build/metabuild.py 0000664 0000000 0000000 00000005364 12260750563 0017613 0 ustar 00root root 0000000 0000000 import ninja_syntax
import os
# Simple meta-build system.
class Make(object):
def __init__(self):
self.output = open(self.output_filename(), 'w')
self.rules = {}
self.rule_text = ''
self.all_targets = []
self.default_targets = []
self.clean_files = []
self.distclean_files = []
self.output.write("""all::
ifndef VERBOSE
Verb = @
endif
""")
def output_filename(self):
return 'Makefile'
def rule(self, name, command, description=None, depfile=None,
generator=False):
self.rules[name] = {'command': command, 'description': description,
'depfile': depfile, 'generator': generator}
def build(self, output, rule, inputs=[], implicit=[], order_only=[]):
inputs = self._as_list(inputs)
implicit = self._as_list(implicit)
order_only = self._as_list(order_only)
output_dir = os.path.dirname(output)
if output_dir != '' and not os.path.isdir(output_dir):
os.makedirs(output_dir)
dollar_in = ' '.join(inputs)
subst = lambda text: text.replace('$in', dollar_in).replace('$out', output)
deps = ' '.join(inputs + implicit)
if order_only:
deps += ' | '
deps += ' '.join(order_only)
self.output.write('%s: %s\n' % (output, deps))
r = self.rules[rule]
command = subst(r['command'])
if r['description']:
desc = subst(r['description'])
self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command))
else:
self.output.write('\t%s\n' % command)
if r['depfile']:
depfile = subst(r['depfile'])
self.output.write('-include '+depfile+'\n')
self.output.write('\n')
self.all_targets.append(output)
if r['generator']:
self.distclean_files.append(output)
if r['depfile']:
self.distclean_files.append(depfile)
else:
self.clean_files.append(output)
if r['depfile']:
self.distclean_files.append(depfile)
def _as_list(self, input):
if isinstance(input, list):
return input
return [input]
def default(self, paths):
self.default_targets += self._as_list(paths)
def finish(self):
self.output.write('all:: %s\n\n' % ' '.join(self.default_targets or self.all_targets))
self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
class Ninja(ninja_syntax.Writer):
def __init__(self):
ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w'))
def output_filename(self):
return 'build.ninja'
def finish(self):
pass
def from_name(name):
if name == 'make':
return Make()
if name == 'ninja':
return Ninja()
raise LookupError, 'unknown generator: %s; supported generators are make and ninja' % name
libclc-0~git20140101/build/ninja_syntax.py 0000664 0000000 0000000 00000006766 12260750563 0020361 0 ustar 00root root 0000000 0000000 #!/usr/bin/python
"""Python module for generating .ninja files.
Note that this is emphatically not a required piece of Ninja; it's
just a helpful utility for build-file-generation systems that already
use Python.
"""
import textwrap
class Writer(object):
def __init__(self, output, width=78):
self.output = output
self.width = width
def newline(self):
self.output.write('\n')
def comment(self, text):
for line in textwrap.wrap(text, self.width - 2):
self.output.write('# ' + line + '\n')
def variable(self, key, value, indent=0):
if value is None:
return
if isinstance(value, list):
value = ' '.join(value)
self._line('%s = %s' % (key, value), indent)
def rule(self, name, command, description=None, depfile=None,
generator=False):
self._line('rule %s' % name)
self.variable('command', command, indent=1)
if description:
self.variable('description', description, indent=1)
if depfile:
self.variable('depfile', depfile, indent=1)
if generator:
self.variable('generator', '1', indent=1)
def build(self, outputs, rule, inputs=None, implicit=None, order_only=None,
variables=None):
outputs = self._as_list(outputs)
all_inputs = self._as_list(inputs)[:]
if implicit:
all_inputs.append('|')
all_inputs.extend(self._as_list(implicit))
if order_only:
all_inputs.append('||')
all_inputs.extend(self._as_list(order_only))
self._line('build %s: %s %s' % (' '.join(outputs),
rule,
' '.join(all_inputs)))
if variables:
for key, val in variables:
self.variable(key, val, indent=1)
return outputs
def include(self, path):
self._line('include %s' % path)
def subninja(self, path):
self._line('subninja %s' % path)
def default(self, paths):
self._line('default %s' % ' '.join(self._as_list(paths)))
def _line(self, text, indent=0):
"""Write 'text' word-wrapped at self.width characters."""
leading_space = ' ' * indent
while len(text) > self.width:
# The text is too wide; wrap if possible.
# Find the rightmost space that would obey our width constraint.
available_space = self.width - len(leading_space) - len(' $')
space = text.rfind(' ', 0, available_space)
if space < 0:
# No such space; just use the first space we can find.
space = text.find(' ', available_space)
if space < 0:
# Give up on breaking.
break
self.output.write(leading_space + text[0:space] + ' $\n')
text = text[space+1:]
# Subsequent lines are continuations, so indent them.
leading_space = ' ' * (indent+2)
self.output.write(leading_space + text + '\n')
def _as_list(self, input):
if input is None:
return []
if isinstance(input, list):
return input
return [input]
def escape(string):
"""Escape a string such that it can be embedded into a Ninja file without
further interpretation."""
assert '\n' not in string, 'Ninja syntax does not allow newlines'
# We only have one special metacharacter: '$'.
return string.replace('$', '$$')
libclc-0~git20140101/compile-test.sh 0000775 0000000 0000000 00000000340 12260750563 0017125 0 ustar 00root root 0000000 0000000 #!/bin/sh
clang -target nvptx--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang nvptx--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers -Dcl_khr_fp64 "$@"
libclc-0~git20140101/configure.py 0000775 0000000 0000000 00000022477 12260750563 0016536 0 ustar 00root root 0000000 0000000 #!/usr/bin/python
def c_compiler_rule(b, name, description, compiler, flags):
command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags)
b.rule(name, command, description + " $out", depfile="$out.d")
version_major = 0;
version_minor = 0;
version_patch = 1;
from optparse import OptionParser
import os
import string
from subprocess import *
import sys
srcdir = os.path.dirname(sys.argv[0])
sys.path.insert(0, os.path.join(srcdir, 'build'))
import metabuild
p = OptionParser()
p.add_option('--with-llvm-config', metavar='PATH',
help='use given llvm-config script')
p.add_option('--with-cxx-compiler', metavar='PATH',
help='use given C++ compiler')
p.add_option('--prefix', metavar='PATH',
help='install to given prefix')
p.add_option('--libexecdir', metavar='PATH',
help='install *.bc to given dir')
p.add_option('--includedir', metavar='PATH',
help='install include files to given dir')
p.add_option('--pkgconfigdir', metavar='PATH',
help='install clc.pc to given dir')
p.add_option('-g', metavar='GENERATOR', default='make',
help='use given generator (default: make)')
(options, args) = p.parse_args()
llvm_config_exe = options.with_llvm_config or "llvm-config"
prefix = options.prefix
if not prefix:
prefix = '/usr/local'
libexecdir = options.libexecdir
if not libexecdir:
libexecdir = os.path.join(prefix, 'lib/clc')
includedir = options.includedir
if not includedir:
includedir = os.path.join(prefix, 'include')
pkgconfigdir = options.pkgconfigdir
if not pkgconfigdir:
pkgconfigdir = os.path.join(prefix, 'share/pkgconfig')
def llvm_config(args):
try:
proc = Popen([llvm_config_exe] + args, stdout=PIPE)
return proc.communicate()[0].rstrip().replace('\n', ' ')
except OSError:
print "Error executing llvm-config."
print "Please ensure that llvm-config is in your $PATH, or use --with-llvm-config."
sys.exit(1)
llvm_version = string.split(string.replace(llvm_config(['--version']), 'svn', ''), '.')
llvm_system_libs = ''
if (int(llvm_version[0]) == 3 and int(llvm_version[1]) >= 5) or int(llvm_version[0]) > 3:
llvm_system_libs = llvm_config(['--system-libs'])
llvm_bindir = llvm_config(['--bindir'])
llvm_core_libs = llvm_config(['--libs', 'core', 'bitreader', 'bitwriter']) + ' ' + \
llvm_system_libs + ' ' + \
llvm_config(['--ldflags'])
llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti'
llvm_clang = os.path.join(llvm_bindir, 'clang')
llvm_link = os.path.join(llvm_bindir, 'llvm-link')
llvm_opt = os.path.join(llvm_bindir, 'opt')
cxx_compiler = options.with_cxx_compiler
if not cxx_compiler:
cxx_compiler = os.path.join(llvm_bindir, 'clang++')
available_targets = {
'r600--' : { 'devices' :
[{'gpu' : 'cedar', 'aliases' : ['palm', 'sumo', 'sumo2', 'redwood', 'juniper']},
{'gpu' : 'cypress', 'aliases' : ['hemlock']},
{'gpu' : 'barts', 'aliases' : ['turks', 'caicos']},
{'gpu' : 'cayman', 'aliases' : ['aruba']},
{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'bonaire', 'kabini', 'kaveri', 'hawaii']}]},
'nvptx--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []}] },
'nvptx64--nvidiacl' : { 'devices' : [{'gpu' : '', 'aliases' : []}] }
}
default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--']
targets = args
if not targets:
targets = default_targets
b = metabuild.from_name(options.g)
b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"),
'LLVM-AS $out')
b.rule("LLVM_LINK", command = llvm_link + " -o $out $in",
description = 'LLVM-LINK $out')
b.rule("OPT", command = llvm_opt + " -O3 -o $out $in",
description = 'OPT $out')
c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', cxx_compiler, llvm_cxxflags)
b.rule("LLVM_TOOL_LINK", cxx_compiler + " -o $out $in %s" % llvm_core_libs, 'LINK $out')
prepare_builtins = os.path.join('utils', 'prepare-builtins')
b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX",
os.path.join(srcdir, 'utils', 'prepare-builtins.cpp'))
b.build(prepare_builtins, "LLVM_TOOL_LINK",
os.path.join('utils', 'prepare-builtins.o'))
b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
'PREPARE-BUILTINS $out')
b.rule("PYTHON_GEN", "python < $in > $out", "PYTHON_GEN $out")
b.build('generic/lib/convert.cl', "PYTHON_GEN", ['generic/lib/gen_convert.py'])
manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
os.path.join(srcdir, 'build', 'ninja_syntax.py')])
install_files_bc = []
install_deps = []
# Create libclc.pc
clc = open('libclc.pc', 'w')
clc.write('includedir=%(inc)s\nlibexecdir=%(lib)s\n\nName: libclc\nDescription: Library requirements of the OpenCL C programming language\nVersion: %(maj)s.%(min)s.%(pat)s\nCflags: -I${includedir}\nLibs: -L${libexecdir}' %
{'inc': includedir, 'lib': libexecdir, 'maj': version_major, 'min': version_minor, 'pat': version_patch})
clc.close()
for target in targets:
(t_arch, t_vendor, t_os) = target.split('-')
archs = [t_arch]
if t_arch == 'nvptx' or t_arch == 'nvptx64':
archs.append('ptx')
archs.append('generic')
subdirs = []
for arch in archs:
subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
subdirs.append("%s-%s" % (arch, t_os))
subdirs.append(arch)
incdirs = filter(os.path.isdir,
[os.path.join(srcdir, subdir, 'include') for subdir in subdirs])
libdirs = filter(lambda d: os.path.isfile(os.path.join(d, 'SOURCES')),
[os.path.join(srcdir, subdir, 'lib') for subdir in subdirs])
clang_cl_includes = ' '.join(["-I%s" % incdir for incdir in incdirs])
for device in available_targets[target]['devices']:
# The rule for building a .bc file for the specified architecture using clang.
clang_bc_flags = "-target %s -I`dirname $in` %s " \
"-fno-builtin " \
"-Dcl_clang_storage_class_specifiers " \
"-Dcl_khr_fp64 " \
"-Dcles_khr_int64 " \
"-D__CLC_INTERNAL " \
"-emit-llvm" % (target, clang_cl_includes)
if device['gpu'] != '':
clang_bc_flags += ' -mcpu=' + device['gpu']
clang_bc_rule = "CLANG_CL_BC_" + target
c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags)
objects = []
sources_seen = set()
if device['gpu'] == '':
full_target_name = target
obj_suffix = ''
else:
full_target_name = device['gpu'] + '-' + target
obj_suffix = '.' + device['gpu']
for libdir in libdirs:
subdir_list_file = os.path.join(libdir, 'SOURCES')
manifest_deps.add(subdir_list_file)
override_list_file = os.path.join(libdir, 'OVERRIDES')
# Add target overrides
if os.path.exists(override_list_file):
for override in open(override_list_file).readlines():
override = override.rstrip()
sources_seen.add(override)
for src in open(subdir_list_file).readlines():
src = src.rstrip()
if src not in sources_seen:
sources_seen.add(src)
obj = os.path.join(target, 'lib', src + obj_suffix + '.bc')
objects.append(obj)
src_file = os.path.join(libdir, src)
ext = os.path.splitext(src)[1]
if ext == '.ll':
b.build(obj, 'LLVM_AS', src_file)
else:
b.build(obj, clang_bc_rule, src_file)
builtins_link_bc = os.path.join(target, 'lib', 'builtins.link' + obj_suffix + '.bc')
builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt' + obj_suffix + '.bc')
builtins_bc = os.path.join('built_libs', full_target_name + '.bc')
b.build(builtins_link_bc, "LLVM_LINK", objects)
b.build(builtins_opt_bc, "OPT", builtins_link_bc)
b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
install_files_bc.append((builtins_bc, builtins_bc))
install_deps.append(builtins_bc)
for alias in device['aliases']:
b.rule("CREATE_ALIAS", "ln -fs %s $out" % os.path.basename(builtins_bc)
,"CREATE-ALIAS $out")
alias_file = os.path.join('built_libs', alias + '-' + target + '.bc')
b.build(alias_file, "CREATE_ALIAS", builtins_bc)
install_files_bc.append((alias_file, alias_file))
install_deps.append(alias_file)
b.default(builtins_bc)
install_cmd = ' && '.join(['mkdir -p $(DESTDIR)/%(dst)s && cp -r %(src)s $(DESTDIR)/%(dst)s' %
{'src': file,
'dst': libexecdir}
for (file, dest) in install_files_bc])
install_cmd = ' && '.join(['%(old)s && mkdir -p $(DESTDIR)/%(dst)s && cp -r %(srcdir)s/generic/include/clc $(DESTDIR)/%(dst)s' %
{'old': install_cmd,
'dst': includedir,
'srcdir': srcdir}])
install_cmd = ' && '.join(['%(old)s && mkdir -p $(DESTDIR)/%(dst)s && cp -r libclc.pc $(DESTDIR)/%(dst)s' %
{'old': install_cmd,
'dst': pkgconfigdir}])
b.rule('install', command = install_cmd, description = 'INSTALL')
b.build('install', 'install', install_deps)
b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
generator = True)
b.build(b.output_filename(), 'configure', list(manifest_deps))
b.finish()
libclc-0~git20140101/generic/ 0000775 0000000 0000000 00000000000 12260750563 0015600 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/ 0000775 0000000 0000000 00000000000 12260750563 0017223 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/ 0000775 0000000 0000000 00000000000 12260750563 0017764 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/as_type.h 0000664 0000000 0000000 00000005570 12260750563 0021610 0 ustar 00root root 0000000 0000000 #define as_char(x) __builtin_astype(x, char)
#define as_uchar(x) __builtin_astype(x, uchar)
#define as_short(x) __builtin_astype(x, short)
#define as_ushort(x) __builtin_astype(x, ushort)
#define as_int(x) __builtin_astype(x, int)
#define as_uint(x) __builtin_astype(x, uint)
#define as_long(x) __builtin_astype(x, long)
#define as_ulong(x) __builtin_astype(x, ulong)
#define as_float(x) __builtin_astype(x, float)
#define as_char2(x) __builtin_astype(x, char2)
#define as_uchar2(x) __builtin_astype(x, uchar2)
#define as_short2(x) __builtin_astype(x, short2)
#define as_ushort2(x) __builtin_astype(x, ushort2)
#define as_int2(x) __builtin_astype(x, int2)
#define as_uint2(x) __builtin_astype(x, uint2)
#define as_long2(x) __builtin_astype(x, long2)
#define as_ulong2(x) __builtin_astype(x, ulong2)
#define as_float2(x) __builtin_astype(x, float2)
#define as_char3(x) __builtin_astype(x, char3)
#define as_uchar3(x) __builtin_astype(x, uchar3)
#define as_short3(x) __builtin_astype(x, short3)
#define as_ushort3(x) __builtin_astype(x, ushort3)
#define as_int3(x) __builtin_astype(x, int3)
#define as_uint3(x) __builtin_astype(x, uint3)
#define as_long3(x) __builtin_astype(x, long3)
#define as_ulong3(x) __builtin_astype(x, ulong3)
#define as_float3(x) __builtin_astype(x, float3)
#define as_char4(x) __builtin_astype(x, char4)
#define as_uchar4(x) __builtin_astype(x, uchar4)
#define as_short4(x) __builtin_astype(x, short4)
#define as_ushort4(x) __builtin_astype(x, ushort4)
#define as_int4(x) __builtin_astype(x, int4)
#define as_uint4(x) __builtin_astype(x, uint4)
#define as_long4(x) __builtin_astype(x, long4)
#define as_ulong4(x) __builtin_astype(x, ulong4)
#define as_float4(x) __builtin_astype(x, float4)
#define as_char8(x) __builtin_astype(x, char8)
#define as_uchar8(x) __builtin_astype(x, uchar8)
#define as_short8(x) __builtin_astype(x, short8)
#define as_ushort8(x) __builtin_astype(x, ushort8)
#define as_int8(x) __builtin_astype(x, int8)
#define as_uint8(x) __builtin_astype(x, uint8)
#define as_long8(x) __builtin_astype(x, long8)
#define as_ulong8(x) __builtin_astype(x, ulong8)
#define as_float8(x) __builtin_astype(x, float8)
#define as_char16(x) __builtin_astype(x, char16)
#define as_uchar16(x) __builtin_astype(x, uchar16)
#define as_short16(x) __builtin_astype(x, short16)
#define as_ushort16(x) __builtin_astype(x, ushort16)
#define as_int16(x) __builtin_astype(x, int16)
#define as_uint16(x) __builtin_astype(x, uint16)
#define as_long16(x) __builtin_astype(x, long16)
#define as_ulong16(x) __builtin_astype(x, ulong16)
#define as_float16(x) __builtin_astype(x, float16)
#ifdef cl_khr_fp64
#define as_double(x) __builtin_astype(x, double)
#define as_double2(x) __builtin_astype(x, double2)
#define as_double3(x) __builtin_astype(x, double3)
#define as_double4(x) __builtin_astype(x, double4)
#define as_double8(x) __builtin_astype(x, double8)
#define as_double16(x) __builtin_astype(x, double16)
#endif
libclc-0~git20140101/generic/include/clc/atomic/ 0000775 0000000 0000000 00000000000 12260750563 0021240 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/atomic/atomic_add.h 0000664 0000000 0000000 00000000136 12260750563 0023475 0 ustar 00root root 0000000 0000000 #define __CLC_FUNCTION atomic_add
#include
#undef __CLC_FUNCTION
libclc-0~git20140101/generic/include/clc/atomic/atomic_dec.h 0000664 0000000 0000000 00000000050 12260750563 0023473 0 ustar 00root root 0000000 0000000 #define atomic_dec(p) atomic_sub(p, 1);
libclc-0~git20140101/generic/include/clc/atomic/atomic_decl.inc 0000664 0000000 0000000 00000000510 12260750563 0024172 0 ustar 00root root 0000000 0000000
#define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE) \
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE);
#define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE) \
__CLC_DECLARE_ATOMIC(global, TYPE); \
__CLC_DECLARE_ATOMIC(local, TYPE);
__CLC_DECLARE_ATOMIC_ADDRSPACE(int);
__CLC_DECLARE_ATOMIC_ADDRSPACE(uint);
libclc-0~git20140101/generic/include/clc/atomic/atomic_inc.h 0000664 0000000 0000000 00000000050 12260750563 0023511 0 ustar 00root root 0000000 0000000 #define atomic_inc(p) atomic_add(p, 1);
libclc-0~git20140101/generic/include/clc/atomic/atomic_sub.h 0000664 0000000 0000000 00000000136 12260750563 0023536 0 ustar 00root root 0000000 0000000 #define __CLC_FUNCTION atomic_sub
#include
#undef __CLC_FUNCTION
libclc-0~git20140101/generic/include/clc/cl_khr_global_int32_base_atomics/ 0000775 0000000 0000000 00000000000 12260750563 0026276 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h 0000664 0000000 0000000 00000000227 12260750563 0030220 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL int atom_add(global int *p, int val);
_CLC_OVERLOAD _CLC_DECL unsigned int atom_add(global unsigned int *p, unsigned int val);
libclc-0~git20140101/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h 0000664 0000000 0000000 00000000174 12260750563 0030224 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL int atom_dec(global int *p);
_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(global unsigned int *p);
libclc-0~git20140101/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h 0000664 0000000 0000000 00000000174 12260750563 0030242 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL int atom_inc(global int *p);
_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(global unsigned int *p);
libclc-0~git20140101/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h 0000664 0000000 0000000 00000000227 12260750563 0030261 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL int atom_sub(global int *p, int val);
_CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(global unsigned int *p, unsigned int val);
libclc-0~git20140101/generic/include/clc/clc.h 0000664 0000000 0000000 00000007156 12260750563 0020707 0 ustar 00root root 0000000 0000000 #ifndef cl_clang_storage_class_specifiers
#error Implementation requires cl_clang_storage_class_specifiers extension!
#endif
#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
/* Function Attributes */
#include
/* Pattern Macro Definitions */
#include
/* 6.1 Supported Data Types */
#include
/* 6.2.3 Explicit Conversions */
#include
/* 6.2.4.2 Reinterpreting Types Using as_type() and as_typen() */
#include
/* 6.11.1 Work-Item Functions */
#include
#include
#include
#include
#include
#include
/* 6.11.2 Math Functions */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* 6.11.2.1 Floating-point macros */
#include
/* 6.11.3 Integer Functions */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* 6.11.3 Integer Definitions */
#include
/* 6.11.2 and 6.11.3 Shared Integer/Math Functions */
#include
#include
#include
#include
#include
/* 6.11.4 Common Functions */
#include
/* 6.11.5 Geometric Functions */
#include
#include
#include
#include
/* 6.11.6 Relational Functions */
#include
#include
#include
#include
/* 6.11.8 Synchronization Functions */
#include
#include
/* 6.11.11 Atomic Functions */
#include
#include
#include
#include
/* cl_khr_global_int32_base_atomics Extension Functions */
#include
#include
#include
#include
/* libclc internal defintions */
#ifdef __CLC_INTERNAL
#include
#endif
#pragma OPENCL EXTENSION all : disable
libclc-0~git20140101/generic/include/clc/clcfunc.h 0000664 0000000 0000000 00000000260 12260750563 0021550 0 ustar 00root root 0000000 0000000 #define _CLC_OVERLOAD __attribute__((overloadable))
#define _CLC_DECL
#define _CLC_DEF __attribute__((always_inline))
#define _CLC_INLINE __attribute__((always_inline)) inline
libclc-0~git20140101/generic/include/clc/clcmacro.h 0000664 0000000 0000000 00000004016 12260750563 0021721 0 ustar 00root root 0000000 0000000 #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
} \
\
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
} \
\
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
} \
\
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
} \
\
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
}
#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
} \
\
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
FUNCTION(x.z, y.z)); \
} \
\
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
} \
\
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
} \
\
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
}
#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
return BUILTIN(x, y); \
} \
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)
#define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \
return BUILTIN(x); \
} \
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE)
libclc-0~git20140101/generic/include/clc/clctypes.h 0000664 0000000 0000000 00000006162 12260750563 0021770 0 ustar 00root root 0000000 0000000 /* 6.1.1 Built-in Scalar Data Types */
#include
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
/* 6.1.2 Built-in Vector Data Types */
typedef __attribute__((ext_vector_type(2))) char char2;
typedef __attribute__((ext_vector_type(3))) char char3;
typedef __attribute__((ext_vector_type(4))) char char4;
typedef __attribute__((ext_vector_type(8))) char char8;
typedef __attribute__((ext_vector_type(16))) char char16;
typedef __attribute__((ext_vector_type(2))) uchar uchar2;
typedef __attribute__((ext_vector_type(3))) uchar uchar3;
typedef __attribute__((ext_vector_type(4))) uchar uchar4;
typedef __attribute__((ext_vector_type(8))) uchar uchar8;
typedef __attribute__((ext_vector_type(16))) uchar uchar16;
typedef __attribute__((ext_vector_type(2))) short short2;
typedef __attribute__((ext_vector_type(3))) short short3;
typedef __attribute__((ext_vector_type(4))) short short4;
typedef __attribute__((ext_vector_type(8))) short short8;
typedef __attribute__((ext_vector_type(16))) short short16;
typedef __attribute__((ext_vector_type(2))) ushort ushort2;
typedef __attribute__((ext_vector_type(3))) ushort ushort3;
typedef __attribute__((ext_vector_type(4))) ushort ushort4;
typedef __attribute__((ext_vector_type(8))) ushort ushort8;
typedef __attribute__((ext_vector_type(16))) ushort ushort16;
typedef __attribute__((ext_vector_type(2))) int int2;
typedef __attribute__((ext_vector_type(3))) int int3;
typedef __attribute__((ext_vector_type(4))) int int4;
typedef __attribute__((ext_vector_type(8))) int int8;
typedef __attribute__((ext_vector_type(16))) int int16;
typedef __attribute__((ext_vector_type(2))) uint uint2;
typedef __attribute__((ext_vector_type(3))) uint uint3;
typedef __attribute__((ext_vector_type(4))) uint uint4;
typedef __attribute__((ext_vector_type(8))) uint uint8;
typedef __attribute__((ext_vector_type(16))) uint uint16;
typedef __attribute__((ext_vector_type(2))) long long2;
typedef __attribute__((ext_vector_type(3))) long long3;
typedef __attribute__((ext_vector_type(4))) long long4;
typedef __attribute__((ext_vector_type(8))) long long8;
typedef __attribute__((ext_vector_type(16))) long long16;
typedef __attribute__((ext_vector_type(2))) ulong ulong2;
typedef __attribute__((ext_vector_type(3))) ulong ulong3;
typedef __attribute__((ext_vector_type(4))) ulong ulong4;
typedef __attribute__((ext_vector_type(8))) ulong ulong8;
typedef __attribute__((ext_vector_type(16))) ulong ulong16;
typedef __attribute__((ext_vector_type(2))) float float2;
typedef __attribute__((ext_vector_type(3))) float float3;
typedef __attribute__((ext_vector_type(4))) float float4;
typedef __attribute__((ext_vector_type(8))) float float8;
typedef __attribute__((ext_vector_type(16))) float float16;
/* 9.3 Double Precision Floating-Point */
#ifdef cl_khr_fp64
typedef __attribute__((ext_vector_type(2))) double double2;
typedef __attribute__((ext_vector_type(3))) double double3;
typedef __attribute__((ext_vector_type(4))) double double4;
typedef __attribute__((ext_vector_type(8))) double double8;
typedef __attribute__((ext_vector_type(16))) double double16;
#endif
libclc-0~git20140101/generic/include/clc/common/ 0000775 0000000 0000000 00000000000 12260750563 0021254 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/common/sign.h 0000664 0000000 0000000 00000000221 12260750563 0022360 0 ustar 00root root 0000000 0000000 #define __CLC_FUNCTION sign
#define __CLC_BODY
#include
#undef __CLC_FUNCTION
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/convert.h 0000664 0000000 0000000 00000004416 12260750563 0021622 0 ustar 00root root 0000000 0000000 #define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
_CLC_OVERLOAD _CLC_DECL TO_TYPE convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
#define _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
_CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
_CLC_CONVERT_DECL(FROM_TYPE##2, TO_TYPE##2, SUFFIX) \
_CLC_CONVERT_DECL(FROM_TYPE##3, TO_TYPE##3, SUFFIX) \
_CLC_CONVERT_DECL(FROM_TYPE##4, TO_TYPE##4, SUFFIX) \
_CLC_CONVERT_DECL(FROM_TYPE##8, TO_TYPE##8, SUFFIX) \
_CLC_CONVERT_DECL(FROM_TYPE##16, TO_TYPE##16, SUFFIX)
#define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uint, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, short, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ushort, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, long, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, ulong, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, float, SUFFIX)
#ifdef cl_khr_fp64
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_DECL(FROM_TYPE, double, SUFFIX)
#else
#define _CLC_VECTOR_CONVERT_FROM(FROM_TYPE, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX)
#endif
#define _CLC_VECTOR_CONVERT_TO1(SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(char, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(uchar, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(int, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(uint, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(short, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(ushort, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(long, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(ulong, SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(float, SUFFIX)
#ifdef cl_khr_fp64
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX) \
_CLC_VECTOR_CONVERT_FROM(double, SUFFIX)
#else
#define _CLC_VECTOR_CONVERT_TO(SUFFIX) \
_CLC_VECTOR_CONVERT_TO1(SUFFIX)
#endif
#define _CLC_VECTOR_CONVERT_TO_SUFFIX(ROUND) \
_CLC_VECTOR_CONVERT_TO(_sat##ROUND) \
_CLC_VECTOR_CONVERT_TO(ROUND)
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtn)
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rte)
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtz)
_CLC_VECTOR_CONVERT_TO_SUFFIX(_rtp)
_CLC_VECTOR_CONVERT_TO_SUFFIX()
libclc-0~git20140101/generic/include/clc/float/ 0000775 0000000 0000000 00000000000 12260750563 0021071 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/float/definitions.h 0000664 0000000 0000000 00000001051 12260750563 0023552 0 ustar 00root root 0000000 0000000 #define FLT_DIG 6
#define FLT_MANT_DIG 24
#define FLT_MAX_10_EXP +38
#define FLT_MAX_EXP +128
#define FLT_MIN_10_EXP -37
#define FLT_MIN_EXP -125
#define FLT_RADIX 2
#define FLT_MAX 0x1.fffffep127f
#define FLT_MIN 0x1.0p-126f
#define FLT_EPSILON 0x1.0p-23f
#ifdef cl_khr_fp64
#define DBL_DIG 15
#define DBL_MANT_DIG 53
#define DBL_MAX_10_EXP +308
#define DBL_MAX_EXP +1024
#define DBL_MIN_10_EXP -307
#define DBL_MIN_EXP -1021
#define DBL_MAX 0x1.fffffffffffffp1023
#define DBL_MIN 0x1.0p-1022
#define DBL_EPSILON 0x1.0p-52
#endif
libclc-0~git20140101/generic/include/clc/gentype.inc 0000664 0000000 0000000 00000001470 12260750563 0022134 0 ustar 00root root 0000000 0000000 #define __CLC_GENTYPE float
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float2
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float3
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float4
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float8
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float16
#include BODY
#undef __CLC_GENTYPE
#ifdef cl_khr_fp64
#define __CLC_GENTYPE double
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double2
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double3
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double4
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double8
#include BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double16
#include BODY
#undef __CLC_GENTYPE
#endif
#undef BODY
libclc-0~git20140101/generic/include/clc/geometric/ 0000775 0000000 0000000 00000000000 12260750563 0021742 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/geometric/cross.h 0000664 0000000 0000000 00000000170 12260750563 0023242 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1);
_CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1);
libclc-0~git20140101/generic/include/clc/geometric/distance.h 0000664 0000000 0000000 00000000124 12260750563 0023702 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/geometric/dot.h 0000664 0000000 0000000 00000000117 12260750563 0022700 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/geometric/dot.inc 0000664 0000000 0000000 00000000113 12260750563 0023216 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT dot(__CLC_FLOATN p0, __CLC_FLOATN p1);
libclc-0~git20140101/generic/include/clc/geometric/floatn.inc 0000664 0000000 0000000 00000001267 12260750563 0023726 0 ustar 00root root 0000000 0000000 #define __CLC_FLOAT float
#define __CLC_FLOATN float
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN float2
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN float3
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN float4
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_FLOAT
#ifdef cl_khr_fp64
#define __CLC_FLOAT double
#define __CLC_FLOATN double
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN double2
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN double3
#include __CLC_BODY
#undef __CLC_FLOATN
#define __CLC_FLOATN double4
#include __CLC_BODY
#undef __CLC_FLOATN
#undef __CLC_FLOAT
#endif
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/geometric/length.h 0000664 0000000 0000000 00000000122 12260750563 0023367 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/geometric/length.inc 0000664 0000000 0000000 00000000075 12260750563 0023720 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_FLOAT length(__CLC_FLOATN p0);
libclc-0~git20140101/generic/include/clc/geometric/normalize.h 0000664 0000000 0000000 00000000125 12260750563 0024111 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/geometric/normalize.inc 0000664 0000000 0000000 00000000100 12260750563 0024424 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_FLOATN normalize(__CLC_FLOATN p);
libclc-0~git20140101/generic/include/clc/integer/ 0000775 0000000 0000000 00000000000 12260750563 0021421 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/integer/abs.h 0000664 0000000 0000000 00000000114 12260750563 0022333 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/abs.inc 0000664 0000000 0000000 00000000076 12260750563 0022664 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs(__CLC_GENTYPE x);
libclc-0~git20140101/generic/include/clc/integer/abs_diff.h 0000664 0000000 0000000 00000000121 12260750563 0023321 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/abs_diff.inc 0000664 0000000 0000000 00000000124 12260750563 0023646 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/add_sat.h 0000664 0000000 0000000 00000000120 12260750563 0023162 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/add_sat.inc 0000664 0000000 0000000 00000000121 12260750563 0023505 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE add_sat(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/clz.h 0000664 0000000 0000000 00000000114 12260750563 0022356 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/clz.inc 0000664 0000000 0000000 00000000074 12260750563 0022705 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clz(__CLC_GENTYPE x);
libclc-0~git20140101/generic/include/clc/integer/definitions.h 0000664 0000000 0000000 00000000616 12260750563 0024110 0 ustar 00root root 0000000 0000000 #define CHAR_BIT 8
#define INT_MAX 2147483647
#define INT_MIN -2147483648
#define LONG_MAX 0x7fffffffffffffffL
#define LONG_MIN -0x8000000000000000L
#define SCHAR_MAX 127
#define SCHAR_MIN -128
#define CHAR_MAX 127
#define CHAR_MIN -128
#define SHRT_MAX 32767
#define SHRT_MIN -32768
#define UCHAR_MAX 255
#define USHRT_MAX 65535
#define UINT_MAX 0xffffffff
#define ULONG_MAX 0xffffffffffffffffUL
libclc-0~git20140101/generic/include/clc/integer/gentype.inc 0000664 0000000 0000000 00000022723 12260750563 0023575 0 ustar 00root root 0000000 0000000 //These 2 defines only change when switching between data sizes or base types to
//keep this file manageable.
#define __CLC_GENSIZE 8
#define __CLC_SCALAR_GENTYPE char
#define __CLC_GENTYPE char
#define __CLC_U_GENTYPE uchar
#define __CLC_S_GENTYPE char
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE char2
#define __CLC_U_GENTYPE uchar2
#define __CLC_S_GENTYPE char2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE char3
#define __CLC_U_GENTYPE uchar3
#define __CLC_S_GENTYPE char3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE char4
#define __CLC_U_GENTYPE uchar4
#define __CLC_S_GENTYPE char4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE char8
#define __CLC_U_GENTYPE uchar8
#define __CLC_S_GENTYPE char8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE char16
#define __CLC_U_GENTYPE uchar16
#define __CLC_S_GENTYPE char16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE uchar
#define __CLC_GENTYPE uchar
#define __CLC_U_GENTYPE uchar
#define __CLC_S_GENTYPE char
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uchar2
#define __CLC_U_GENTYPE uchar2
#define __CLC_S_GENTYPE char2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uchar3
#define __CLC_U_GENTYPE uchar3
#define __CLC_S_GENTYPE char3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uchar4
#define __CLC_U_GENTYPE uchar4
#define __CLC_S_GENTYPE char4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uchar8
#define __CLC_U_GENTYPE uchar8
#define __CLC_S_GENTYPE char8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uchar16
#define __CLC_U_GENTYPE uchar16
#define __CLC_S_GENTYPE char16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_GENSIZE
#define __CLC_GENSIZE 16
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE short
#define __CLC_GENTYPE short
#define __CLC_U_GENTYPE ushort
#define __CLC_S_GENTYPE short
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE short2
#define __CLC_U_GENTYPE ushort2
#define __CLC_S_GENTYPE short2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE short3
#define __CLC_U_GENTYPE ushort3
#define __CLC_S_GENTYPE short3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE short4
#define __CLC_U_GENTYPE ushort4
#define __CLC_S_GENTYPE short4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE short8
#define __CLC_U_GENTYPE ushort8
#define __CLC_S_GENTYPE short8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE short16
#define __CLC_U_GENTYPE ushort16
#define __CLC_S_GENTYPE short16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE ushort
#define __CLC_GENTYPE ushort
#define __CLC_U_GENTYPE ushort
#define __CLC_S_GENTYPE short
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ushort2
#define __CLC_U_GENTYPE ushort2
#define __CLC_S_GENTYPE short2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ushort3
#define __CLC_U_GENTYPE ushort3
#define __CLC_S_GENTYPE short3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ushort4
#define __CLC_U_GENTYPE ushort4
#define __CLC_S_GENTYPE short4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ushort8
#define __CLC_U_GENTYPE ushort8
#define __CLC_S_GENTYPE short8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ushort16
#define __CLC_U_GENTYPE ushort16
#define __CLC_S_GENTYPE short16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_GENSIZE
#define __CLC_GENSIZE 32
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE int
#define __CLC_GENTYPE int
#define __CLC_U_GENTYPE uint
#define __CLC_S_GENTYPE int
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE int2
#define __CLC_U_GENTYPE uint2
#define __CLC_S_GENTYPE int2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE int3
#define __CLC_U_GENTYPE uint3
#define __CLC_S_GENTYPE int3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE int4
#define __CLC_U_GENTYPE uint4
#define __CLC_S_GENTYPE int4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE int8
#define __CLC_U_GENTYPE uint8
#define __CLC_S_GENTYPE int8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE int16
#define __CLC_U_GENTYPE uint16
#define __CLC_S_GENTYPE int16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE uint
#define __CLC_GENTYPE uint
#define __CLC_U_GENTYPE uint
#define __CLC_S_GENTYPE int
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uint2
#define __CLC_U_GENTYPE uint2
#define __CLC_S_GENTYPE int2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uint3
#define __CLC_U_GENTYPE uint3
#define __CLC_S_GENTYPE int3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uint4
#define __CLC_U_GENTYPE uint4
#define __CLC_S_GENTYPE int4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uint8
#define __CLC_U_GENTYPE uint8
#define __CLC_S_GENTYPE int8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE uint16
#define __CLC_U_GENTYPE uint16
#define __CLC_S_GENTYPE int16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_GENSIZE
#define __CLC_GENSIZE 64
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE long
#define __CLC_GENTYPE long
#define __CLC_U_GENTYPE ulong
#define __CLC_S_GENTYPE long
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE long2
#define __CLC_U_GENTYPE ulong2
#define __CLC_S_GENTYPE long2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE long3
#define __CLC_U_GENTYPE ulong3
#define __CLC_S_GENTYPE long3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE long4
#define __CLC_U_GENTYPE ulong4
#define __CLC_S_GENTYPE long4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE long8
#define __CLC_U_GENTYPE ulong8
#define __CLC_S_GENTYPE long8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE long16
#define __CLC_U_GENTYPE ulong16
#define __CLC_S_GENTYPE long16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_SCALAR_GENTYPE
#define __CLC_SCALAR_GENTYPE ulong
#define __CLC_GENTYPE ulong
#define __CLC_U_GENTYPE ulong
#define __CLC_S_GENTYPE long
#define __CLC_SCALAR 1
#include __CLC_BODY
#undef __CLC_SCALAR
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ulong2
#define __CLC_U_GENTYPE ulong2
#define __CLC_S_GENTYPE long2
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ulong3
#define __CLC_U_GENTYPE ulong3
#define __CLC_S_GENTYPE long3
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ulong4
#define __CLC_U_GENTYPE ulong4
#define __CLC_S_GENTYPE long4
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ulong8
#define __CLC_U_GENTYPE ulong8
#define __CLC_S_GENTYPE long8
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#define __CLC_GENTYPE ulong16
#define __CLC_U_GENTYPE ulong16
#define __CLC_S_GENTYPE long16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_U_GENTYPE
#undef __CLC_S_GENTYPE
#undef __CLC_GENSIZE
#undef __CLC_SCALAR_GENTYPE
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/integer/hadd.h 0000664 0000000 0000000 00000000115 12260750563 0022467 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/hadd.inc 0000664 0000000 0000000 00000000116 12260750563 0023012 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/integer-gentype.inc 0000664 0000000 0000000 00000001266 12260750563 0025227 0 ustar 00root root 0000000 0000000 #define __CLC_GENTYPE int
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE int16
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE uint16
#include __CLC_BODY
#undef __CLC_GENTYPE
libclc-0~git20140101/generic/include/clc/integer/mad24.h 0000664 0000000 0000000 00000000150 12260750563 0022475 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/integer/mad24.inc 0000664 0000000 0000000 00000000140 12260750563 0023016 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
libclc-0~git20140101/generic/include/clc/integer/mad_hi.h 0000664 0000000 0000000 00000000056 12260750563 0023014 0 ustar 00root root 0000000 0000000 #define mad_hi(a, b, c) (mul_hi((a),(b))+(c))
libclc-0~git20140101/generic/include/clc/integer/mul24.h 0000664 0000000 0000000 00000000150 12260750563 0022531 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/integer/mul24.inc 0000664 0000000 0000000 00000000117 12260750563 0023056 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/mul_hi.h 0000664 0000000 0000000 00000000117 12260750563 0023046 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/mul_hi.inc 0000664 0000000 0000000 00000000120 12260750563 0023362 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mul_hi(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/rhadd.h 0000664 0000000 0000000 00000000116 12260750563 0022652 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/rhadd.inc 0000664 0000000 0000000 00000000117 12260750563 0023175 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/rotate.h 0000664 0000000 0000000 00000000117 12260750563 0023067 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/rotate.inc 0000664 0000000 0000000 00000000120 12260750563 0023403 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/sub_sat.h 0000664 0000000 0000000 00000000120 12260750563 0023223 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/integer/sub_sat.inc 0000664 0000000 0000000 00000000121 12260750563 0023546 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sub_sat(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/integer/upsample.h 0000664 0000000 0000000 00000001723 12260750563 0023423 0 ustar 00root root 0000000 0000000 #define __CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
_CLC_OVERLOAD _CLC_DECL BGENTYPE upsample(GENTYPE hi, UGENTYPE lo);
#define __CLC_UPSAMPLE_VEC(BGENTYPE, GENTYPE, UGENTYPE) \
__CLC_UPSAMPLE_DECL(BGENTYPE, GENTYPE, UGENTYPE) \
__CLC_UPSAMPLE_DECL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2) \
__CLC_UPSAMPLE_DECL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3) \
__CLC_UPSAMPLE_DECL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4) \
__CLC_UPSAMPLE_DECL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8) \
__CLC_UPSAMPLE_DECL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16) \
#define __CLC_UPSAMPLE_TYPES() \
__CLC_UPSAMPLE_VEC(short, char, uchar) \
__CLC_UPSAMPLE_VEC(ushort, uchar, uchar) \
__CLC_UPSAMPLE_VEC(int, short, ushort) \
__CLC_UPSAMPLE_VEC(uint, ushort, ushort) \
__CLC_UPSAMPLE_VEC(long, int, uint) \
__CLC_UPSAMPLE_VEC(ulong, uint, uint) \
__CLC_UPSAMPLE_TYPES()
#undef __CLC_UPSAMPLE_TYPES
#undef __CLC_UPSAMPLE_DECL
#undef __CLC_UPSAMPLE_VEC
libclc-0~git20140101/generic/include/clc/math/ 0000775 0000000 0000000 00000000000 12260750563 0020715 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/math/binary_decl.inc 0000664 0000000 0000000 00000000424 12260750563 0023663 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b);
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, float b);
#ifdef cl_khr_fp64
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, double b);
#endif
libclc-0~git20140101/generic/include/clc/math/binary_intrin.inc 0000664 0000000 0000000 00000002125 12260750563 0024257 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD float __CLC_FUNCTION(float, float) __asm(__CLC_INTRINSIC ".f32");
_CLC_OVERLOAD float2 __CLC_FUNCTION(float2, float2) __asm(__CLC_INTRINSIC ".v2f32");
_CLC_OVERLOAD float3 __CLC_FUNCTION(float3, float3) __asm(__CLC_INTRINSIC ".v3f32");
_CLC_OVERLOAD float4 __CLC_FUNCTION(float4, float4) __asm(__CLC_INTRINSIC ".v4f32");
_CLC_OVERLOAD float8 __CLC_FUNCTION(float8, float8) __asm(__CLC_INTRINSIC ".v8f32");
_CLC_OVERLOAD float16 __CLC_FUNCTION(float16, float16) __asm(__CLC_INTRINSIC ".v16f32");
#ifdef cl_khr_fp64
_CLC_OVERLOAD double __CLC_FUNCTION(double, double) __asm(__CLC_INTRINSIC ".f64");
_CLC_OVERLOAD double2 __CLC_FUNCTION(double2, double2) __asm(__CLC_INTRINSIC ".v2f64");
_CLC_OVERLOAD double3 __CLC_FUNCTION(double3, double3) __asm(__CLC_INTRINSIC ".v3f64");
_CLC_OVERLOAD double4 __CLC_FUNCTION(double4, double4) __asm(__CLC_INTRINSIC ".v4f64");
_CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8) __asm(__CLC_INTRINSIC ".v8f64");
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
#endif
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
libclc-0~git20140101/generic/include/clc/math/ceil.h 0000664 0000000 0000000 00000000220 12260750563 0021774 0 ustar 00root root 0000000 0000000 #undef ceil
#define ceil __clc_ceil
#define __CLC_FUNCTION __clc_ceil
#define __CLC_INTRINSIC "llvm.ceil"
#include
libclc-0~git20140101/generic/include/clc/math/clc_nextafter.h 0000664 0000000 0000000 00000000367 12260750563 0023715 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#define __CLC_FUNCTION nextafter
#include
#undef __CLC_FUNCTION
#define __CLC_FUNCTION __clc_nextafter
#include
#undef __CLC_FUNCTION
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/math/cos.h 0000664 0000000 0000000 00000000213 12260750563 0021646 0 ustar 00root root 0000000 0000000 #undef cos
#define cos __clc_cos
#define __CLC_FUNCTION __clc_cos
#define __CLC_INTRINSIC "llvm.cos"
#include
libclc-0~git20140101/generic/include/clc/math/exp.h 0000664 0000000 0000000 00000000134 12260750563 0021660 0 ustar 00root root 0000000 0000000 #undef exp
// exp(x) = exp2(x * log2(e)
#define exp(val) (__clc_exp2((val) * 1.44269504f))
libclc-0~git20140101/generic/include/clc/math/exp2.h 0000664 0000000 0000000 00000000220 12260750563 0021736 0 ustar 00root root 0000000 0000000 #undef exp2
#define exp2 __clc_exp2
#define __CLC_FUNCTION __clc_exp2
#define __CLC_INTRINSIC "llvm.exp2"
#include
libclc-0~git20140101/generic/include/clc/math/fabs.h 0000664 0000000 0000000 00000000220 12260750563 0021773 0 ustar 00root root 0000000 0000000 #undef fabs
#define fabs __clc_fabs
#define __CLC_FUNCTION __clc_fabs
#define __CLC_INTRINSIC "llvm.fabs"
#include
libclc-0~git20140101/generic/include/clc/math/floor.h 0000664 0000000 0000000 00000000225 12260750563 0022206 0 ustar 00root root 0000000 0000000 #undef floor
#define floor __clc_floor
#define __CLC_FUNCTION __clc_floor
#define __CLC_INTRINSIC "llvm.floor"
#include
libclc-0~git20140101/generic/include/clc/math/fma.h 0000664 0000000 0000000 00000000215 12260750563 0021627 0 ustar 00root root 0000000 0000000 #undef fma
#define fma __clc_fma
#define __CLC_FUNCTION __clc_fma
#define __CLC_INTRINSIC "llvm.fma"
#include
libclc-0~git20140101/generic/include/clc/math/fmax.h 0000664 0000000 0000000 00000000300 12260750563 0022012 0 ustar 00root root 0000000 0000000 #undef fmax
#define fmax __clc_fmax
#define __CLC_BODY
#define __CLC_FUNCTION __clc_fmax
#include
#undef __CLC_BODY
#undef __CLC_FUNCTION
libclc-0~git20140101/generic/include/clc/math/fmin.h 0000664 0000000 0000000 00000000300 12260750563 0022010 0 ustar 00root root 0000000 0000000 #undef fmin
#define fmin __clc_fmin
#define __CLC_BODY
#define __CLC_FUNCTION __clc_fmin
#include
#undef __CLC_BODY
#undef __CLC_FUNCTION
libclc-0~git20140101/generic/include/clc/math/gentype.inc 0000664 0000000 0000000 00000002133 12260750563 0023062 0 ustar 00root root 0000000 0000000 #define __CLC_SCALAR_GENTYPE float
#define __CLC_GENTYPE float
#define __CLC_SCALAR
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_SCALAR
#define __CLC_GENTYPE float2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float3
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE float16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_SCALAR_GENTYPE
#ifdef cl_khr_fp64
#define __CLC_SCALAR_GENTYPE double
#define __CLC_SCALAR
#define __CLC_GENTYPE double
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_SCALAR
#define __CLC_GENTYPE double2
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double3
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double4
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double8
#include __CLC_BODY
#undef __CLC_GENTYPE
#define __CLC_GENTYPE double16
#include __CLC_BODY
#undef __CLC_GENTYPE
#undef __CLC_SCALAR_GENTYPE
#endif
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/math/hypot.h 0000664 0000000 0000000 00000000110 12260750563 0022221 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/math/hypot.inc 0000664 0000000 0000000 00000000117 12260750563 0022552 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hypot(__CLC_GENTYPE x, __CLC_GENTYPE y);
libclc-0~git20140101/generic/include/clc/math/log.h 0000664 0000000 0000000 00000000140 12260750563 0021642 0 ustar 00root root 0000000 0000000 #undef log
// log(x) = log2(x) * (1/log2(e))
#define log(val) (__clc_log2(val) * 0.693147181f)
libclc-0~git20140101/generic/include/clc/math/log2.h 0000664 0000000 0000000 00000000220 12260750563 0021723 0 ustar 00root root 0000000 0000000 #undef log2
#define log2 __clc_log2
#define __CLC_FUNCTION __clc_log2
#define __CLC_INTRINSIC "llvm.log2"
#include
libclc-0~git20140101/generic/include/clc/math/mad.h 0000664 0000000 0000000 00000000106 12260750563 0021624 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/math/mad.inc 0000664 0000000 0000000 00000000136 12260750563 0022151 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c);
libclc-0~git20140101/generic/include/clc/math/native_cos.h 0000664 0000000 0000000 00000000027 12260750563 0023217 0 ustar 00root root 0000000 0000000 #define native_cos cos
libclc-0~git20140101/generic/include/clc/math/native_divide.h 0000664 0000000 0000000 00000000050 12260750563 0023673 0 ustar 00root root 0000000 0000000 #define native_divide(x, y) ((x) / (y))
libclc-0~git20140101/generic/include/clc/math/native_exp.h 0000664 0000000 0000000 00000000027 12260750563 0023227 0 ustar 00root root 0000000 0000000 #define native_exp exp
libclc-0~git20140101/generic/include/clc/math/native_exp2.h 0000664 0000000 0000000 00000000031 12260750563 0023304 0 ustar 00root root 0000000 0000000 #define native_exp2 exp2
libclc-0~git20140101/generic/include/clc/math/native_log.h 0000664 0000000 0000000 00000000027 12260750563 0023214 0 ustar 00root root 0000000 0000000 #define native_log log
libclc-0~git20140101/generic/include/clc/math/native_log2.h 0000664 0000000 0000000 00000000031 12260750563 0023271 0 ustar 00root root 0000000 0000000 #define native_log2 log2
libclc-0~git20140101/generic/include/clc/math/native_powr.h 0000664 0000000 0000000 00000000030 12260750563 0023414 0 ustar 00root root 0000000 0000000 #define native_powr pow
libclc-0~git20140101/generic/include/clc/math/native_sin.h 0000664 0000000 0000000 00000000027 12260750563 0023224 0 ustar 00root root 0000000 0000000 #define native_sin sin
libclc-0~git20140101/generic/include/clc/math/native_sqrt.h 0000664 0000000 0000000 00000000031 12260750563 0023417 0 ustar 00root root 0000000 0000000 #define native_sqrt sqrt
libclc-0~git20140101/generic/include/clc/math/nextafter.h 0000664 0000000 0000000 00000000227 12260750563 0023067 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#define __CLC_FUNCTION nextafter
#include
#undef __CLC_FUNCTION
#undef __CLC_BODY
libclc-0~git20140101/generic/include/clc/math/pow.h 0000664 0000000 0000000 00000000214 12260750563 0021670 0 ustar 00root root 0000000 0000000 #undef pow
#define pow __clc_pow
#define __CLC_FUNCTION __clc_pow
#define __CLC_INTRINSIC "llvm.pow"
#include
libclc-0~git20140101/generic/include/clc/math/rint.h 0000664 0000000 0000000 00000000220 12260750563 0022034 0 ustar 00root root 0000000 0000000 #undef rint
#define rint __clc_rint
#define __CLC_FUNCTION __clc_rint
#define __CLC_INTRINSIC "llvm.rint"
#include
libclc-0~git20140101/generic/include/clc/math/round.h 0000664 0000000 0000000 00000000303 12260750563 0022211 0 ustar 00root root 0000000 0000000 #undef round
#define round __clc_round
#define __CLC_FUNCTION __clc_round
#define __CLC_INTRINSIC "llvm.round"
#include
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
libclc-0~git20140101/generic/include/clc/math/rsqrt.h 0000664 0000000 0000000 00000000040 12260750563 0022233 0 ustar 00root root 0000000 0000000 #define rsqrt(x) (1.f/sqrt(x))
libclc-0~git20140101/generic/include/clc/math/sin.h 0000664 0000000 0000000 00000000213 12260750563 0021653 0 ustar 00root root 0000000 0000000 #undef sin
#define sin __clc_sin
#define __CLC_FUNCTION __clc_sin
#define __CLC_INTRINSIC "llvm.sin"
#include
libclc-0~git20140101/generic/include/clc/math/sqrt.h 0000664 0000000 0000000 00000000220 12260750563 0022051 0 ustar 00root root 0000000 0000000 #undef sqrt
#define sqrt __clc_sqrt
#define __CLC_FUNCTION __clc_sqrt
#define __CLC_INTRINSIC "llvm.sqrt"
#include
libclc-0~git20140101/generic/include/clc/math/ternary_intrin.inc 0000664 0000000 0000000 00000002273 12260750563 0024463 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD float __CLC_FUNCTION(float, float, float) __asm(__CLC_INTRINSIC ".f32");
_CLC_OVERLOAD float2 __CLC_FUNCTION(float2, float2, float2) __asm(__CLC_INTRINSIC ".v2f32");
_CLC_OVERLOAD float3 __CLC_FUNCTION(float3, float3, float3) __asm(__CLC_INTRINSIC ".v3f32");
_CLC_OVERLOAD float4 __CLC_FUNCTION(float4, float4, float4) __asm(__CLC_INTRINSIC ".v4f32");
_CLC_OVERLOAD float8 __CLC_FUNCTION(float8, float8, float8) __asm(__CLC_INTRINSIC ".v8f32");
_CLC_OVERLOAD float16 __CLC_FUNCTION(float16, float16, float16) __asm(__CLC_INTRINSIC ".v16f32");
#ifdef cl_khr_fp64
_CLC_OVERLOAD double __CLC_FUNCTION(double, double, double) __asm(__CLC_INTRINSIC ".f64");
_CLC_OVERLOAD double2 __CLC_FUNCTION(double2, double2, double2) __asm(__CLC_INTRINSIC ".v2f64");
_CLC_OVERLOAD double3 __CLC_FUNCTION(double3, double3, double3) __asm(__CLC_INTRINSIC ".v3f64");
_CLC_OVERLOAD double4 __CLC_FUNCTION(double4, double4, double4) __asm(__CLC_INTRINSIC ".v4f64");
_CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8, double8) __asm(__CLC_INTRINSIC ".v8f64");
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
#endif
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
libclc-0~git20140101/generic/include/clc/math/trunc.h 0000664 0000000 0000000 00000000303 12260750563 0022215 0 ustar 00root root 0000000 0000000 #undef trunc
#define trunc __clc_trunc
#define __CLC_FUNCTION __clc_trunc
#define __CLC_INTRINSIC "llvm.trunc"
#include
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
libclc-0~git20140101/generic/include/clc/math/unary_decl.inc 0000664 0000000 0000000 00000000107 12260750563 0023533 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x);
libclc-0~git20140101/generic/include/clc/math/unary_intrin.inc 0000664 0000000 0000000 00000002007 12260750563 0024130 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD float __CLC_FUNCTION(float f) __asm(__CLC_INTRINSIC ".f32");
_CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
_CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
_CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
_CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32");
#ifdef cl_khr_fp64
_CLC_OVERLOAD double __CLC_FUNCTION(double d) __asm(__CLC_INTRINSIC ".f64");
_CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
_CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
_CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
_CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
#endif
#undef __CLC_FUNCTION
#undef __CLC_INTRINSIC
libclc-0~git20140101/generic/include/clc/relational/ 0000775 0000000 0000000 00000000000 12260750563 0022116 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/relational/any.h 0000664 0000000 0000000 00000000603 12260750563 0023055 0 ustar 00root root 0000000 0000000
#define _CLC_ANY_DECL(TYPE) \
_CLC_OVERLOAD _CLC_DECL int any(TYPE v);
#define _CLC_VECTOR_ANY_DECL(TYPE) \
_CLC_ANY_DECL(TYPE) \
_CLC_ANY_DECL(TYPE##2) \
_CLC_ANY_DECL(TYPE##3) \
_CLC_ANY_DECL(TYPE##4) \
_CLC_ANY_DECL(TYPE##8) \
_CLC_ANY_DECL(TYPE##16)
_CLC_VECTOR_ANY_DECL(char)
_CLC_VECTOR_ANY_DECL(short)
_CLC_VECTOR_ANY_DECL(int)
_CLC_VECTOR_ANY_DECL(long)
libclc-0~git20140101/generic/include/clc/relational/bitselect.h 0000664 0000000 0000000 00000000067 12260750563 0024250 0 ustar 00root root 0000000 0000000 #define bitselect(x, y, z) ((x) ^ ((z) & ((y) ^ (x))))
libclc-0~git20140101/generic/include/clc/relational/isnan.h 0000664 0000000 0000000 00000001122 12260750563 0023373 0 ustar 00root root 0000000 0000000
#define _CLC_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
_CLC_OVERLOAD _CLC_DECL RET_TYPE isnan(ARG_TYPE);
#define _CLC_VECTOR_ISNAN_DECL(RET_TYPE, ARG_TYPE) \
_CLC_ISNAN_DECL(RET_TYPE##2, ARG_TYPE##2) \
_CLC_ISNAN_DECL(RET_TYPE##3, ARG_TYPE##3) \
_CLC_ISNAN_DECL(RET_TYPE##4, ARG_TYPE##4) \
_CLC_ISNAN_DECL(RET_TYPE##8, ARG_TYPE##8) \
_CLC_ISNAN_DECL(RET_TYPE##16, ARG_TYPE##16)
_CLC_ISNAN_DECL(int, float)
_CLC_VECTOR_ISNAN_DECL(int, float)
#ifdef cl_khr_fp64
_CLC_ISNAN_DECL(int, double)
_CLC_VECTOR_ISNAN_DECL(long, double)
#endif
#undef _CLC_ISNAN_DECL
#undef _CLC_VECTOR_ISNAN_DECL
libclc-0~git20140101/generic/include/clc/relational/select.h 0000664 0000000 0000000 00000000052 12260750563 0023543 0 ustar 00root root 0000000 0000000 #define select(a, b, c) ((c) ? (b) : (a))
libclc-0~git20140101/generic/include/clc/shared/ 0000775 0000000 0000000 00000000000 12260750563 0021232 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/shared/clamp.h 0000664 0000000 0000000 00000000230 12260750563 0022472 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/shared/clamp.inc 0000664 0000000 0000000 00000000353 12260750563 0023022 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z);
#endif
libclc-0~git20140101/generic/include/clc/shared/max.h 0000664 0000000 0000000 00000000224 12260750563 0022166 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/shared/max.inc 0000664 0000000 0000000 00000000276 12260750563 0022517 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b);
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
#endif
libclc-0~git20140101/generic/include/clc/shared/min.h 0000664 0000000 0000000 00000000224 12260750563 0022164 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/include/clc/shared/min.inc 0000664 0000000 0000000 00000000276 12260750563 0022515 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b);
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b);
#endif
libclc-0~git20140101/generic/include/clc/shared/vload.h 0000664 0000000 0000000 00000002467 12260750563 0022521 0 ustar 00root root 0000000 0000000 #define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
_CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
_CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
_CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
_CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
_CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
_CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
_CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
_CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
#define _CLC_VECTOR_VLOAD_PRIM() \
_CLC_VECTOR_VLOAD_PRIM1(char) \
_CLC_VECTOR_VLOAD_PRIM1(uchar) \
_CLC_VECTOR_VLOAD_PRIM1(short) \
_CLC_VECTOR_VLOAD_PRIM1(ushort) \
_CLC_VECTOR_VLOAD_PRIM1(int) \
_CLC_VECTOR_VLOAD_PRIM1(uint) \
_CLC_VECTOR_VLOAD_PRIM1(long) \
_CLC_VECTOR_VLOAD_PRIM1(ulong) \
_CLC_VECTOR_VLOAD_PRIM1(float) \
#ifdef cl_khr_fp64
#define _CLC_VECTOR_VLOAD() \
_CLC_VECTOR_VLOAD_PRIM1(double) \
_CLC_VECTOR_VLOAD_PRIM()
#else
#define _CLC_VECTOR_VLOAD() \
_CLC_VECTOR_VLOAD_PRIM()
#endif
_CLC_VECTOR_VLOAD()
libclc-0~git20140101/generic/include/clc/shared/vstore.h 0000664 0000000 0000000 00000002447 12260750563 0022734 0 ustar 00root root 0000000 0000000 #define _CLC_VSTORE_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DECL void vstore##WIDTH(VEC_TYPE vec, size_t offset, ADDR_SPACE PRIM_TYPE *out);
#define _CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, ADDR_SPACE) \
_CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
_CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
_CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
_CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
_CLC_VSTORE_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __private) \
_CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __local) \
_CLC_VECTOR_VSTORE_DECL(PRIM_TYPE, __global) \
#define _CLC_VECTOR_VSTORE_PRIM() \
_CLC_VECTOR_VSTORE_PRIM1(char) \
_CLC_VECTOR_VSTORE_PRIM1(uchar) \
_CLC_VECTOR_VSTORE_PRIM1(short) \
_CLC_VECTOR_VSTORE_PRIM1(ushort) \
_CLC_VECTOR_VSTORE_PRIM1(int) \
_CLC_VECTOR_VSTORE_PRIM1(uint) \
_CLC_VECTOR_VSTORE_PRIM1(long) \
_CLC_VECTOR_VSTORE_PRIM1(ulong) \
_CLC_VECTOR_VSTORE_PRIM1(float) \
#ifdef cl_khr_fp64
#define _CLC_VECTOR_VSTORE() \
_CLC_VECTOR_VSTORE_PRIM1(double) \
_CLC_VECTOR_VSTORE_PRIM()
#else
#define _CLC_VECTOR_VSTORE() \
_CLC_VECTOR_VSTORE_PRIM()
#endif
_CLC_VECTOR_VSTORE()
libclc-0~git20140101/generic/include/clc/synchronization/ 0000775 0000000 0000000 00000000000 12260750563 0023225 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/synchronization/barrier.h 0000664 0000000 0000000 00000000062 12260750563 0025022 0 ustar 00root root 0000000 0000000 _CLC_DECL void barrier(cl_mem_fence_flags flags);
libclc-0~git20140101/generic/include/clc/synchronization/cl_mem_fence_flags.h 0000664 0000000 0000000 00000000137 12260750563 0027147 0 ustar 00root root 0000000 0000000 typedef uint cl_mem_fence_flags;
#define CLK_LOCAL_MEM_FENCE 1
#define CLK_GLOBAL_MEM_FENCE 2
libclc-0~git20140101/generic/include/clc/workitem/ 0000775 0000000 0000000 00000000000 12260750563 0021625 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/clc/workitem/get_global_id.h 0000664 0000000 0000000 00000000052 12260750563 0024546 0 ustar 00root root 0000000 0000000 _CLC_DECL size_t get_global_id(uint dim);
libclc-0~git20140101/generic/include/clc/workitem/get_global_size.h 0000664 0000000 0000000 00000000054 12260750563 0025126 0 ustar 00root root 0000000 0000000 _CLC_DECL size_t get_global_size(uint dim);
libclc-0~git20140101/generic/include/clc/workitem/get_group_id.h 0000664 0000000 0000000 00000000051 12260750563 0024441 0 ustar 00root root 0000000 0000000 _CLC_DECL size_t get_group_id(uint dim);
libclc-0~git20140101/generic/include/clc/workitem/get_local_id.h 0000664 0000000 0000000 00000000051 12260750563 0024377 0 ustar 00root root 0000000 0000000 _CLC_DECL size_t get_local_id(uint dim);
libclc-0~git20140101/generic/include/clc/workitem/get_local_size.h 0000664 0000000 0000000 00000000053 12260750563 0024757 0 ustar 00root root 0000000 0000000 _CLC_DECL size_t get_local_size(uint dim);
libclc-0~git20140101/generic/include/clc/workitem/get_num_groups.h 0000664 0000000 0000000 00000000053 12260750563 0025031 0 ustar 00root root 0000000 0000000 _CLC_DECL size_t get_num_groups(uint dim);
libclc-0~git20140101/generic/include/math/ 0000775 0000000 0000000 00000000000 12260750563 0020154 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/include/math/clc_nextafter.h 0000664 0000000 0000000 00000000237 12260750563 0023150 0 ustar 00root root 0000000 0000000 #define __CLC_BODY
#define __CLC_FUNCTION __clc_nextafter
#include
#undef __CLC_BODY
#undef __CLC_FUNCTION
libclc-0~git20140101/generic/lib/ 0000775 0000000 0000000 00000000000 12260750563 0016346 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/SOURCES 0000664 0000000 0000000 00000001670 12260750563 0017420 0 ustar 00root root 0000000 0000000 atomic/atomic_impl.ll
cl_khr_global_int32_base_atomics/atom_add.cl
cl_khr_global_int32_base_atomics/atom_dec.cl
cl_khr_global_int32_base_atomics/atom_inc.cl
cl_khr_global_int32_base_atomics/atom_sub.cl
convert.cl
common/sign.cl
geometric/cross.cl
geometric/dot.cl
geometric/length.cl
geometric/normalize.cl
integer/abs.cl
integer/abs_diff.cl
integer/add_sat.cl
integer/add_sat_if.ll
integer/add_sat_impl.ll
integer/clz.cl
integer/clz_if.ll
integer/clz_impl.ll
integer/hadd.cl
integer/mad24.cl
integer/mul24.cl
integer/mul_hi.cl
integer/rhadd.cl
integer/rotate.cl
integer/sub_sat.cl
integer/sub_sat_if.ll
integer/sub_sat_impl.ll
integer/upsample.cl
math/fmax.cl
math/fmin.cl
math/hypot.cl
math/mad.cl
math/clc_nextafter.cl
math/nextafter.cl
relational/any.cl
relational/isnan.cl
shared/clamp.cl
shared/max.cl
shared/min.cl
shared/vload.cl
shared/vload_impl.ll
shared/vstore.cl
shared/vstore_impl.ll
workitem/get_global_id.cl
workitem/get_global_size.cl
libclc-0~git20140101/generic/lib/atomic/ 0000775 0000000 0000000 00000000000 12260750563 0017622 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/atomic/atomic_impl.ll 0000664 0000000 0000000 00000001443 12260750563 0022452 0 ustar 00root root 0000000 0000000 define i32 @__clc_atomic_add_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %value seq_cst
ret i32 %0
}
define i32 @__clc_atomic_add_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile add i32 addrspace(3)* %ptr, i32 %value seq_cst
ret i32 %0
}
define i32 @__clc_atomic_sub_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %value seq_cst
ret i32 %0
}
define i32 @__clc_atomic_sub_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
entry:
%0 = atomicrmw volatile sub i32 addrspace(3)* %ptr, i32 %value seq_cst
ret i32 %0
}
libclc-0~git20140101/generic/lib/cl_khr_global_int32_base_atomics/ 0000775 0000000 0000000 00000000000 12260750563 0024660 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl 0000664 0000000 0000000 00000000255 12260750563 0026752 0 ustar 00root root 0000000 0000000 #include
#define IMPL(TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_add(global TYPE *p, TYPE val) { \
return atomic_add(p, val); \
}
IMPL(int)
IMPL(unsigned int)
libclc-0~git20140101/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl 0000664 0000000 0000000 00000000237 12260750563 0026755 0 ustar 00root root 0000000 0000000 #include
#define IMPL(TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \
return atom_sub(p, 1); \
}
IMPL(int)
IMPL(unsigned int)
libclc-0~git20140101/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl 0000664 0000000 0000000 00000000237 12260750563 0026773 0 ustar 00root root 0000000 0000000 #include
#define IMPL(TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \
return atom_add(p, 1); \
}
IMPL(int)
IMPL(unsigned int)
libclc-0~git20140101/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl 0000664 0000000 0000000 00000000255 12260750563 0027013 0 ustar 00root root 0000000 0000000 #include
#define IMPL(TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(global TYPE *p, TYPE val) { \
return atomic_sub(p, val); \
}
IMPL(int)
IMPL(unsigned int)
libclc-0~git20140101/generic/lib/common/ 0000775 0000000 0000000 00000000000 12260750563 0017636 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/common/sign.cl 0000664 0000000 0000000 00000001043 12260750563 0021114 0 ustar 00root root 0000000 0000000 #include
#define SIGN(TYPE, F) \
_CLC_DEF _CLC_OVERLOAD TYPE sign(TYPE x) { \
if (isnan(x)) { \
return 0.0F; \
} \
if (x > 0.0F) { \
return 1.0F; \
} \
if (x < 0.0F) { \
return -1.0F; \
} \
return x; /* -0.0 or +0.0 */ \
}
SIGN(float, f)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sign, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
SIGN(double, )
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sign, double)
#endif
libclc-0~git20140101/generic/lib/gen_convert.py 0000664 0000000 0000000 00000032263 12260750563 0021237 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python3
# OpenCL built-in library: type conversion functions
#
# Copyright (c) 2013 Victor Oliveira
# Copyright (c) 2013 Jesse Towner
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# This script generates the file convert_type.cl, which contains all of the
# OpenCL functions in the form:
#
# convert_<_sat><_roundingMode>()
types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double']
int_types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong']
unsigned_types = ['uchar', 'ushort', 'uint', 'ulong']
float_types = ['float', 'double']
int64_types = ['long', 'ulong']
float64_types = ['double']
vector_sizes = ['', '2', '3', '4', '8', '16']
half_sizes = [('2',''), ('4','2'), ('8','4'), ('16','8')]
saturation = ['','_sat']
rounding_modes = ['_rtz','_rte','_rtp','_rtn']
float_prefix = {'float':'FLT_', 'double':'DBL_'}
float_suffix = {'float':'f', 'double':''}
bool_type = {'char' : 'char',
'uchar' : 'char',
'short' : 'short',
'ushort': 'short',
'int' : 'int',
'uint' : 'int',
'long' : 'long',
'ulong' : 'long',
'float' : 'int',
'double' : 'long'}
unsigned_type = {'char' : 'uchar',
'uchar' : 'uchar',
'short' : 'ushort',
'ushort': 'ushort',
'int' : 'uint',
'uint' : 'uint',
'long' : 'ulong',
'ulong' : 'ulong'}
sizeof_type = {'char' : 1, 'uchar' : 1,
'short' : 2, 'ushort' : 2,
'int' : 4, 'uint' : 4,
'long' : 8, 'ulong' : 8,
'float' : 4, 'double' : 8}
limit_max = {'char' : 'CHAR_MAX',
'uchar' : 'UCHAR_MAX',
'short' : 'SHRT_MAX',
'ushort': 'USHRT_MAX',
'int' : 'INT_MAX',
'uint' : 'UINT_MAX',
'long' : 'LONG_MAX',
'ulong' : 'ULONG_MAX'}
limit_min = {'char' : 'CHAR_MIN',
'uchar' : '0',
'short' : 'SHRT_MIN',
'ushort': '0',
'int' : 'INT_MIN',
'uint' : '0',
'long' : 'LONG_MIN',
'ulong' : '0'}
def conditional_guard(src, dst):
int64_count = 0
float64_count = 0
if src in int64_types:
int64_count = int64_count +1
elif src in float64_types:
float64_count = float64_count + 1
if dst in int64_types:
int64_count = int64_count +1
elif dst in float64_types:
float64_count = float64_count + 1
if float64_count > 0 and int64_count > 0:
print("#if defined(cl_khr_fp64) && defined(cles_khr_int64)")
return True
elif float64_count > 0:
print("#ifdef cl_khr_fp64")
return True
elif int64_count > 0:
print("#ifdef cles_khr_int64")
return True
return False
print("""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
$ ./generate-conversion-type-cl.sh
OpenCL type conversion functions
Copyright (c) 2013 Victor Oliveira
Copyright (c) 2013 Jesse Towner
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define INFINITY 1.0f / 0.0f
""")
#
# Default Conversions
#
# All conversions are in accordance with the OpenCL specification,
# which cites the C99 conversion rules.
#
# Casting from floating point to integer results in conversions
# with truncation, so it should be suitable for the default convert
# functions.
#
# Conversions from integer to floating-point, and floating-point to
# floating-point through casting is done with the default rounding
# mode. While C99 allows dynamically changing the rounding mode
# during runtime, it is not a supported feature in OpenCL according
# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
#
# Therefore, we can assume for optimization purposes that the
# rounding mode is fixed to round-to-nearest-even. Platform target
# authors should ensure that the rounding-control registers remain
# in this state, and that this invariant holds.
#
# Also note, even though the OpenCL specification isn't entirely
# clear on this matter, we implement all rounding mode combinations
# even for integer-to-integer conversions. When such a conversion
# is used, the rounding mode is ignored.
#
def generate_default_conversion(src, dst, mode):
close_conditional = conditional_guard(src, dst)
# scalar conversions
print("""_CLC_DEF _CLC_OVERLOAD
{DST} convert_{DST}{M}({SRC} x)
{{
return ({DST})x;
}}
""".format(SRC=src, DST=dst, M=mode))
# vector conversions, done through decomposition to components
for size, half_size in half_sizes:
print("""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
{{
return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
}}
""".format(SRC=src, DST=dst, N=size, H=half_size, M=mode))
# 3-component vector conversions
print("""_CLC_DEF _CLC_OVERLOAD
{DST}3 convert_{DST}3{M}({SRC}3 x)
{{
return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
}}""".format(SRC=src, DST=dst, M=mode))
if close_conditional:
print("#endif")
for src in types:
for dst in types:
generate_default_conversion(src, dst, '')
for src in int_types:
for dst in int_types:
for mode in rounding_modes:
generate_default_conversion(src, dst, mode)
#
# Saturated Conversions To Integers
#
# These functions are dependent on the unsaturated conversion functions
# generated above, and use clamp, max, min, and select to eliminate
# branching and vectorize the conversions.
#
# Again, as above, we allow all rounding modes for integer-to-integer
# conversions with saturation.
#
def generate_saturated_conversion(src, dst, size):
# Header
close_conditional = conditional_guard(src, dst)
print("""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
{{""".format(DST=dst, SRC=src, N=size))
# FIXME: This is a work around for lack of select function with
# signed third argument when the first two arguments are unsigned types.
# We cast to the signed type for sign-extension, then do a bitcast to
# the unsigned type.
if dst in unsigned_types:
bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(DST=dst, BOOL=bool_type[dst], N=size);
bool_suffix = ")"
else:
bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size);
bool_suffix = ""
# Body
if src == dst:
# Conversion between same types
print(" return x;")
elif src in float_types:
# Conversion from float to int
print(""" {DST}{N} y = convert_{DST}{N}(x);
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
return y;""".format(SRC=src, DST=dst, N=size,
DST_MIN=limit_min[dst], DST_MAX=limit_max[dst],
BP=bool_prefix, BS=bool_suffix))
else:
# Integer to integer convesion with sizeof(src) == sizeof(dst)
if sizeof_type[src] == sizeof_type[dst]:
if src in unsigned_types:
print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
else:
print(" x = max(x, ({SRC})0);".format(SRC=src))
# Integer to integer conversion where sizeof(src) > sizeof(dst)
elif sizeof_type[src] > sizeof_type[dst]:
if src in unsigned_types:
print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
else:
print(" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});"
.format(SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]))
# Integer to integer conversion where sizeof(src) < sizeof(dst)
elif src not in unsigned_types and dst in unsigned_types:
print(" x = max(x, ({SRC})0);".format(SRC=src))
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
# Footer
print("}")
if close_conditional:
print("#endif")
for src in types:
for dst in int_types:
for size in vector_sizes:
generate_saturated_conversion(src, dst, size)
def generate_saturated_conversion_with_rounding(src, dst, size, mode):
# Header
close_conditional = conditional_guard(src, dst)
# Body
print("""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
{{
return convert_{DST}{N}_sat(x);
}}
""".format(DST=dst, SRC=src, N=size, M=mode))
# Footer
if close_conditional:
print("#endif")
for src in int_types:
for dst in int_types:
for size in vector_sizes:
for mode in rounding_modes:
generate_saturated_conversion_with_rounding(src, dst, size, mode)
#
# Conversions To/From Floating-Point With Rounding
#
# Note that we assume as above that casts from floating-point to
# integer are done with truncation, and that the default rounding
# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
# rounding rules.
#
# These functions rely on the use of abs, ceil, fabs, floor,
# nextafter, sign, rint and the above generated conversion functions.
#
# Only conversions to integers can have saturation.
#
def generate_float_conversion(src, dst, size, mode, sat):
# Header
close_conditional = conditional_guard(src, dst)
print("""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
{{""".format(SRC=src, DST=dst, N=size, M=mode, S=sat))
# Perform conversion
if dst in int_types:
if mode == '_rte':
print(" x = rint(x);");
elif mode == '_rtp':
print(" x = ceil(x);");
elif mode == '_rtn':
print(" x = floor(x);");
print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
elif mode == '_rte':
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
else:
print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
print(" {SRC}{N} y = convert_{SRC}{N}(y);".format(SRC=src, N=size))
if mode == '_rtz':
if src in int_types:
print(" {USRC}{N} abs_x = abs(x);".format(USRC=unsigned_type[src], N=size))
print(" {USRC}{N} abs_y = abs(y);".format(USRC=unsigned_type[src], N=size))
else:
print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
print(" return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));"
.format(DST=dst, N=size, BOOL=bool_type[dst]))
if mode == '_rtp':
print(" return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));"
.format(DST=dst, N=size, BOOL=bool_type[dst]))
if mode == '_rtn':
print(" return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));"
.format(DST=dst, N=size, BOOL=bool_type[dst]))
# Footer
print("}")
if close_conditional:
print("#endif")
for src in float_types:
for dst in int_types:
for size in vector_sizes:
for mode in rounding_modes:
for sat in saturation:
generate_float_conversion(src, dst, size, mode, sat)
for src in types:
for dst in float_types:
for size in vector_sizes:
for mode in rounding_modes:
generate_float_conversion(src, dst, size, mode, '')
libclc-0~git20140101/generic/lib/geometric/ 0000775 0000000 0000000 00000000000 12260750563 0020324 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/geometric/cross.cl 0000664 0000000 0000000 00000000554 12260750563 0022001 0 ustar 00root root 0000000 0000000 #include
_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x);
}
_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x, 0.f);
}
libclc-0~git20140101/generic/lib/geometric/dot.cl 0000664 0000000 0000000 00000001560 12260750563 0021434 0 ustar 00root root 0000000 0000000 #include
_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
return p0*p1;
}
_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
return p0.x*p1.x + p0.y*p1.y;
}
_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
}
_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
}
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
return p0*p1;
}
_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
return p0.x*p1.x + p0.y*p1.y;
}
_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
}
_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
}
#endif
libclc-0~git20140101/generic/lib/geometric/length.cl 0000664 0000000 0000000 00000000243 12260750563 0022124 0 ustar 00root root 0000000 0000000 #include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/geometric/length.inc 0000664 0000000 0000000 00000000137 12260750563 0022301 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_FLOAT length(__CLC_FLOATN p) {
return native_sqrt(dot(p, p));
}
libclc-0~git20140101/generic/lib/geometric/normalize.cl 0000664 0000000 0000000 00000000246 12260750563 0022646 0 ustar 00root root 0000000 0000000 #include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/geometric/normalize.inc 0000664 0000000 0000000 00000000130 12260750563 0023011 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_FLOATN normalize(__CLC_FLOATN p) {
return p/length(p);
}
libclc-0~git20140101/generic/lib/integer/ 0000775 0000000 0000000 00000000000 12260750563 0020003 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/integer/abs.cl 0000664 0000000 0000000 00000000126 12260750563 0021067 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/abs.inc 0000664 0000000 0000000 00000000236 12260750563 0021244 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs(__CLC_GENTYPE x) {
return __builtin_astype((__CLC_GENTYPE)(x > (__CLC_GENTYPE)(0) ? x : -x), __CLC_U_GENTYPE);
}
libclc-0~git20140101/generic/lib/integer/abs_diff.cl 0000664 0000000 0000000 00000000133 12260750563 0022055 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/abs_diff.inc 0000664 0000000 0000000 00000000246 12260750563 0022235 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_U_GENTYPE abs_diff(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return __builtin_astype((__CLC_GENTYPE)(x > y ? x-y : y-x), __CLC_U_GENTYPE);
}
libclc-0~git20140101/generic/lib/integer/add_sat.cl 0000664 0000000 0000000 00000003346 12260750563 0021730 0 ustar 00root root 0000000 0000000 #include
// From add_sat.ll
_CLC_DECL char __clc_add_sat_s8(char, char);
_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar);
_CLC_DECL short __clc_add_sat_s16(short, short);
_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort);
_CLC_DECL int __clc_add_sat_s32(int, int);
_CLC_DECL uint __clc_add_sat_u32(uint, uint);
_CLC_DECL long __clc_add_sat_s64(long, long);
_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong);
_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
return __clc_add_sat_s8(x, y);
}
_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
return __clc_add_sat_u8(x, y);
}
_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
return __clc_add_sat_s16(x, y);
}
_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
return __clc_add_sat_u16(x, y);
}
_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
return __clc_add_sat_s32(x, y);
}
_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
return __clc_add_sat_u32(x, y);
}
_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
return __clc_add_sat_s64(x, y);
}
_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
return __clc_add_sat_u64(x, y);
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong)
libclc-0~git20140101/generic/lib/integer/add_sat_if.ll 0000664 0000000 0000000 00000003165 12260750563 0022416 0 ustar 00root root 0000000 0000000 declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
define i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
define i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
define i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
define i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
define i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
define i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
define i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
define i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}
libclc-0~git20140101/generic/lib/integer/add_sat_impl.ll 0000664 0000000 0000000 00000006027 12260750563 0022761 0 ustar 00root root 0000000 0000000 declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
define i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
%x.msb = ashr i8 %x, 7
%x.limit = xor i8 %x.msb, 127
%sat = select i1 %over, i8 %x.limit, i8 %res
ret i8 %sat
}
define i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
%sat = select i1 %over, i8 -1, i8 %res
ret i8 %sat
}
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
define i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
%x.msb = ashr i16 %x, 15
%x.limit = xor i16 %x.msb, 32767
%sat = select i1 %over, i16 %x.limit, i16 %res
ret i16 %sat
}
define i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
%sat = select i1 %over, i16 -1, i16 %res
ret i16 %sat
}
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
define i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
%x.msb = ashr i32 %x, 31
%x.limit = xor i32 %x.msb, 2147483647
%sat = select i1 %over, i32 %x.limit, i32 %res
ret i32 %sat
}
define i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
%sat = select i1 %over, i32 -1, i32 %res
ret i32 %sat
}
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
define i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
%x.msb = ashr i64 %x, 63
%x.limit = xor i64 %x.msb, 9223372036854775807
%sat = select i1 %over, i64 %x.limit, i64 %res
ret i64 %sat
}
define i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
%sat = select i1 %over, i64 -1, i64 %res
ret i64 %sat
}
libclc-0~git20140101/generic/lib/integer/clz.cl 0000664 0000000 0000000 00000002626 12260750563 0021121 0 ustar 00root root 0000000 0000000 #include
// From clz.ll
_CLC_DECL char __clc_clz_s8(char);
_CLC_DECL uchar __clc_clz_u8(uchar);
_CLC_DECL short __clc_clz_s16(short);
_CLC_DECL ushort __clc_clz_u16(ushort);
_CLC_DECL int __clc_clz_s32(int);
_CLC_DECL uint __clc_clz_u32(uint);
_CLC_DECL long __clc_clz_s64(long);
_CLC_DECL ulong __clc_clz_u64(ulong);
_CLC_OVERLOAD _CLC_DEF char clz(char x) {
return __clc_clz_s8(x);
}
_CLC_OVERLOAD _CLC_DEF uchar clz(uchar x) {
return __clc_clz_u8(x);
}
_CLC_OVERLOAD _CLC_DEF short clz(short x) {
return __clc_clz_s16(x);
}
_CLC_OVERLOAD _CLC_DEF ushort clz(ushort x) {
return __clc_clz_u16(x);
}
_CLC_OVERLOAD _CLC_DEF int clz(int x) {
return __clc_clz_s32(x);
}
_CLC_OVERLOAD _CLC_DEF uint clz(uint x) {
return __clc_clz_u32(x);
}
_CLC_OVERLOAD _CLC_DEF long clz(long x) {
return __clc_clz_s64(x);
}
_CLC_OVERLOAD _CLC_DEF ulong clz(ulong x) {
return __clc_clz_u64(x);
}
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, clz, char)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, clz, uchar)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, clz, short)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, clz, ushort)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, clz, int)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, clz, uint)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, clz, long)
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, clz, ulong)
libclc-0~git20140101/generic/lib/integer/clz_if.ll 0000664 0000000 0000000 00000002533 12260750563 0021605 0 ustar 00root root 0000000 0000000 declare i8 @__clc_clz_impl_s8(i8 %x)
define i8 @__clc_clz_s8(i8 %x) nounwind readnone alwaysinline {
%call = call i8 @__clc_clz_impl_s8(i8 %x)
ret i8 %call
}
declare i8 @__clc_clz_impl_u8(i8 %x)
define i8 @__clc_clz_u8(i8 %x) nounwind readnone alwaysinline {
%call = call i8 @__clc_clz_impl_u8(i8 %x)
ret i8 %call
}
declare i16 @__clc_clz_impl_s16(i16 %x)
define i16 @__clc_clz_s16(i16 %x) nounwind readnone alwaysinline {
%call = call i16 @__clc_clz_impl_s16(i16 %x)
ret i16 %call
}
declare i16 @__clc_clz_impl_u16(i16 %x)
define i16 @__clc_clz_u16(i16 %x) nounwind readnone alwaysinline {
%call = call i16 @__clc_clz_impl_u16(i16 %x)
ret i16 %call
}
declare i32 @__clc_clz_impl_s32(i32 %x)
define i32 @__clc_clz_s32(i32 %x) nounwind readnone alwaysinline {
%call = call i32 @__clc_clz_impl_s32(i32 %x)
ret i32 %call
}
declare i32 @__clc_clz_impl_u32(i32 %x)
define i32 @__clc_clz_u32(i32 %x) nounwind readnone alwaysinline {
%call = call i32 @__clc_clz_impl_u32(i32 %x)
ret i32 %call
}
declare i64 @__clc_clz_impl_s64(i64 %x)
define i64 @__clc_clz_s64(i64 %x) nounwind readnone alwaysinline {
%call = call i64 @__clc_clz_impl_s64(i64 %x)
ret i64 %call
}
declare i64 @__clc_clz_impl_u64(i64 %x)
define i64 @__clc_clz_u64(i64 %x) nounwind readnone alwaysinline {
%call = call i64 @__clc_clz_impl_u64(i64 %x)
ret i64 %call
}
libclc-0~git20140101/generic/lib/integer/clz_impl.ll 0000664 0000000 0000000 00000002327 12260750563 0022151 0 ustar 00root root 0000000 0000000 declare i8 @llvm.ctlz.i8(i8, i1)
declare i16 @llvm.ctlz.i16(i16, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
define i8 @__clc_clz_impl_s8(i8 %x) nounwind readnone alwaysinline {
%call = call i8 @llvm.ctlz.i8(i8 %x, i1 0)
ret i8 %call
}
define i8 @__clc_clz_impl_u8(i8 %x) nounwind readnone alwaysinline {
%call = call i8 @llvm.ctlz.i8(i8 %x, i1 0)
ret i8 %call
}
define i16 @__clc_clz_impl_s16(i16 %x) nounwind readnone alwaysinline {
%call = call i16 @llvm.ctlz.i16(i16 %x, i1 0)
ret i16 %call
}
define i16 @__clc_clz_impl_u16(i16 %x) nounwind readnone alwaysinline {
%call = call i16 @llvm.ctlz.i16(i16 %x, i1 0)
ret i16 %call
}
define i32 @__clc_clz_impl_s32(i32 %x) nounwind readnone alwaysinline {
%call = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
ret i32 %call
}
define i32 @__clc_clz_impl_u32(i32 %x) nounwind readnone alwaysinline {
%call = call i32 @llvm.ctlz.i32(i32 %x, i1 0)
ret i32 %call
}
define i64 @__clc_clz_impl_s64(i64 %x) nounwind readnone alwaysinline {
%call = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
ret i64 %call
}
define i64 @__clc_clz_impl_u64(i64 %x) nounwind readnone alwaysinline {
%call = call i64 @llvm.ctlz.i64(i64 %x, i1 0)
ret i64 %call
}
libclc-0~git20140101/generic/lib/integer/hadd.cl 0000664 0000000 0000000 00000000127 12260750563 0021223 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/hadd.inc 0000664 0000000 0000000 00000000514 12260750563 0021376 0 ustar 00root root 0000000 0000000 //hadd = (x+y)>>1
//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set)
//This saves us having to do any checks for overflow in the addition sum
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1);
}
libclc-0~git20140101/generic/lib/integer/mad24.cl 0000664 0000000 0000000 00000000140 12260750563 0021225 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/mad24.inc 0000664 0000000 0000000 00000000173 12260750563 0021406 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad24(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z){
return mul24(x, y) + z;
}
libclc-0~git20140101/generic/lib/integer/mul24.cl 0000664 0000000 0000000 00000000140 12260750563 0021261 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/mul24.inc 0000664 0000000 0000000 00000000565 12260750563 0021447 0 ustar 00root root 0000000 0000000
// We need to use shifts here in order to mantain the sign bit for signed
// integers. The compiler should optimize this to (x & 0x00FFFFFF) for
// unsigned integers.
#define CONVERT_TO_24BIT(x) (((x) << 8) >> 8)
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mul24(__CLC_GENTYPE x, __CLC_GENTYPE y){
return CONVERT_TO_24BIT(x) * CONVERT_TO_24BIT(y);
}
#undef CONVERT_TO_24BIT
libclc-0~git20140101/generic/lib/integer/mul_hi.cl 0000664 0000000 0000000 00000007605 12260750563 0021610 0 ustar 00root root 0000000 0000000 #include
//For all types EXCEPT long, which is implemented separately
#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \
return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \
} \
//FOIL-based long mul_hi
//
// Summary: Treat mul_hi(long x, long y) as:
// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
// and b and d are the low-order parts of x and y.
// Thinking back to algebra, we use FOIL to do the work.
_CLC_OVERLOAD _CLC_DEF long mul_hi(long x, long y){
long f, o, i;
ulong l;
//Move the high/low halves of x/y into the lower 32-bits of variables so
//that we can multiply them without worrying about overflow.
long x_hi = x >> 32;
long x_lo = x & UINT_MAX;
long y_hi = y >> 32;
long y_lo = y & UINT_MAX;
//Multiply all of the components according to FOIL method
f = x_hi * y_hi;
o = x_hi * y_lo;
i = x_lo * y_hi;
l = x_lo * y_lo;
//Now add the components back together in the following steps:
//F: doesn't need to be modified
//O/I: Need to be added together.
//L: Shift right by 32-bits, then add into the sum of O and I
//Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
//
//We use hadd to give us a bit of extra precision for the intermediate sums
//but as a result, we shift by 31 bits instead of 32
return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31));
}
_CLC_OVERLOAD _CLC_DEF ulong mul_hi(ulong x, ulong y){
ulong f, o, i;
ulong l;
//Move the high/low halves of x/y into the lower 32-bits of variables so
//that we can multiply them without worrying about overflow.
ulong x_hi = x >> 32;
ulong x_lo = x & UINT_MAX;
ulong y_hi = y >> 32;
ulong y_lo = y & UINT_MAX;
//Multiply all of the components according to FOIL method
f = x_hi * y_hi;
o = x_hi * y_lo;
i = x_lo * y_hi;
l = x_lo * y_lo;
//Now add the components back together, taking care to respect the fact that:
//F: doesn't need to be modified
//O/I: Need to be added together.
//L: Shift right by 32-bits, then add into the sum of O and I
//Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
//
//We use hadd to give us a bit of extra precision for the intermediate sums
//but as a result, we shift by 31 bits instead of 32
return (f + (hadd(o, (i + (l>>32))) >> 31));
}
#define __CLC_MUL_HI_VEC(GENTYPE) \
_CLC_OVERLOAD _CLC_DEF GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \
return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \
} \
_CLC_OVERLOAD _CLC_DEF GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \
return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), mul_hi(x.s2, y.s2)}; \
} \
_CLC_OVERLOAD _CLC_DEF GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \
return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
} \
_CLC_OVERLOAD _CLC_DEF GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \
return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
} \
_CLC_OVERLOAD _CLC_DEF GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \
return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
} \
#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \
__CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
__CLC_MUL_HI_VEC(TYPE)
#define __CLC_MUL_HI_TYPES() \
__CLC_MUL_HI_DEC_IMPL(short, char, 8) \
__CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \
__CLC_MUL_HI_DEC_IMPL(int, short, 16) \
__CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \
__CLC_MUL_HI_DEC_IMPL(long, int, 32) \
__CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \
__CLC_MUL_HI_VEC(long) \
__CLC_MUL_HI_VEC(ulong)
__CLC_MUL_HI_TYPES()
#undef __CLC_MUL_HI_TYPES
#undef __CLC_MUL_HI_DEC_IMPL
#undef __CLC_MUL_HI_IMPL
#undef __CLC_MUL_HI_VEC
#undef __CLC_B32
libclc-0~git20140101/generic/lib/integer/rhadd.cl 0000664 0000000 0000000 00000000130 12260750563 0021377 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/rhadd.inc 0000664 0000000 0000000 00000000547 12260750563 0021566 0 ustar 00root root 0000000 0000000 //rhadd = (x+y+1)>>1
//This can be simplified to x>>1 + y>>1 + (1 if either x or y have the 1s bit set)
//This saves us having to do any checks for overflow in the addition sums
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rhadd(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+((x&(__CLC_GENTYPE)1)|(y&(__CLC_GENTYPE)1));
}
libclc-0~git20140101/generic/lib/integer/rotate.cl 0000664 0000000 0000000 00000000131 12260750563 0021614 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/integer/rotate.inc 0000664 0000000 0000000 00000003220 12260750563 0021771 0 ustar 00root root 0000000 0000000 /**
* Not necessarily optimal... but it produces correct results (at least for int)
* If we're lucky, LLVM will recognize the pattern and produce rotate
* instructions:
* http://llvm.1065342.n5.nabble.com/rotate-td47679.html
*
* Eventually, someone should feel free to implement an llvm-specific version
*/
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE n){
//Try to avoid extra work if someone's spinning the value through multiple
//full rotations
n = n % (__CLC_GENTYPE)__CLC_GENSIZE;
#ifdef __CLC_SCALAR
if (n > 0){
return (x << n) | (((__CLC_U_GENTYPE)x) >> (__CLC_GENSIZE - n));
} else if (n == 0){
return x;
} else {
return ( (((__CLC_U_GENTYPE)x) >> -n) | (x << (__CLC_GENSIZE + n)) );
}
#else
//XXX: There's a lot of __builtin_astype calls to cast everything to
// unsigned ... This should be improved so that if __CLC_GENTYPE==__CLC_U_GENTYPE, no
// casts are required.
__CLC_U_GENTYPE x_1 = __builtin_astype(x, __CLC_U_GENTYPE);
//XXX: Is (__CLC_U_GENTYPE >> S__CLC_GENTYPE) | (__CLC_U_GENTYPE << S__CLC_GENTYPE) legal?
// If so, then combine the amt and shifts into a single set of statements
__CLC_U_GENTYPE amt;
amt = (n < (__CLC_GENTYPE)0 ? __builtin_astype((__CLC_GENTYPE)0-n, __CLC_U_GENTYPE) : (__CLC_U_GENTYPE)0);
x_1 = (x_1 >> amt) | (x_1 << ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
amt = (n < (__CLC_GENTYPE)0 ? (__CLC_U_GENTYPE)0 : __builtin_astype(n, __CLC_U_GENTYPE));
x_1 = (x_1 << amt) | (x_1 >> ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
return __builtin_astype(x_1, __CLC_GENTYPE);
#endif
}
libclc-0~git20140101/generic/lib/integer/sub_sat.cl 0000664 0000000 0000000 00000003346 12260750563 0021771 0 ustar 00root root 0000000 0000000 #include
// From sub_sat.ll
_CLC_DECL char __clc_sub_sat_s8(char, char);
_CLC_DECL uchar __clc_sub_sat_u8(uchar, uchar);
_CLC_DECL short __clc_sub_sat_s16(short, short);
_CLC_DECL ushort __clc_sub_sat_u16(ushort, ushort);
_CLC_DECL int __clc_sub_sat_s32(int, int);
_CLC_DECL uint __clc_sub_sat_u32(uint, uint);
_CLC_DECL long __clc_sub_sat_s64(long, long);
_CLC_DECL ulong __clc_sub_sat_u64(ulong, ulong);
_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) {
return __clc_sub_sat_s8(x, y);
}
_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) {
return __clc_sub_sat_u8(x, y);
}
_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) {
return __clc_sub_sat_s16(x, y);
}
_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) {
return __clc_sub_sat_u16(x, y);
}
_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) {
return __clc_sub_sat_s32(x, y);
}
_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) {
return __clc_sub_sat_u32(x, y);
}
_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) {
return __clc_sub_sat_s64(x, y);
}
_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) {
return __clc_sub_sat_u64(x, y);
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong)
libclc-0~git20140101/generic/lib/integer/sub_sat_if.ll 0000664 0000000 0000000 00000003165 12260750563 0022457 0 ustar 00root root 0000000 0000000 declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
define i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
define i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
define i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
define i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
define i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
define i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
define i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
define i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}
libclc-0~git20140101/generic/lib/integer/sub_sat_impl.ll 0000664 0000000 0000000 00000006023 12260750563 0023016 0 ustar 00root root 0000000 0000000 declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
define i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
%x.msb = ashr i8 %x, 7
%x.limit = xor i8 %x.msb, 127
%sat = select i1 %over, i8 %x.limit, i8 %res
ret i8 %sat
}
define i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
%sat = select i1 %over, i8 0, i8 %res
ret i8 %sat
}
declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16)
declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16)
define i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
%x.msb = ashr i16 %x, 15
%x.limit = xor i16 %x.msb, 32767
%sat = select i1 %over, i16 %x.limit, i16 %res
ret i16 %sat
}
define i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
%sat = select i1 %over, i16 0, i16 %res
ret i16 %sat
}
declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
define i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
%x.msb = ashr i32 %x, 31
%x.limit = xor i32 %x.msb, 2147483647
%sat = select i1 %over, i32 %x.limit, i32 %res
ret i32 %sat
}
define i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
%sat = select i1 %over, i32 0, i32 %res
ret i32 %sat
}
declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64)
declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
define i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
%x.msb = ashr i64 %x, 63
%x.limit = xor i64 %x.msb, 9223372036854775807
%sat = select i1 %over, i64 %x.limit, i64 %res
ret i64 %sat
}
define i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
%sat = select i1 %over, i64 0, i64 %res
ret i64 %sat
}
libclc-0~git20140101/generic/lib/integer/upsample.cl 0000664 0000000 0000000 00000002746 12260750563 0022162 0 ustar 00root root 0000000 0000000 #include
#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF BGENTYPE upsample(GENTYPE hi, UGENTYPE lo){ \
return ((BGENTYPE)hi << GENSIZE) | lo; \
} \
_CLC_OVERLOAD _CLC_DEF BGENTYPE##2 upsample(GENTYPE##2 hi, UGENTYPE##2 lo){ \
return (BGENTYPE##2){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1)}; \
} \
_CLC_OVERLOAD _CLC_DEF BGENTYPE##3 upsample(GENTYPE##3 hi, UGENTYPE##3 lo){ \
return (BGENTYPE##3){upsample(hi.s0, lo.s0), upsample(hi.s1, lo.s1), upsample(hi.s2, lo.s2)}; \
} \
_CLC_OVERLOAD _CLC_DEF BGENTYPE##4 upsample(GENTYPE##4 hi, UGENTYPE##4 lo){ \
return (BGENTYPE##4){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
} \
_CLC_OVERLOAD _CLC_DEF BGENTYPE##8 upsample(GENTYPE##8 hi, UGENTYPE##8 lo){ \
return (BGENTYPE##8){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
} \
_CLC_OVERLOAD _CLC_DEF BGENTYPE##16 upsample(GENTYPE##16 hi, UGENTYPE##16 lo){ \
return (BGENTYPE##16){upsample(hi.lo, lo.lo), upsample(hi.hi, lo.hi)}; \
} \
#define __CLC_UPSAMPLE_TYPES() \
__CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \
__CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \
__CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \
__CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \
__CLC_UPSAMPLE_IMPL(long, int, uint, 32) \
__CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32) \
__CLC_UPSAMPLE_TYPES()
#undef __CLC_UPSAMPLE_TYPES
#undef __CLC_UPSAMPLE_IMPL
libclc-0~git20140101/generic/lib/math/ 0000775 0000000 0000000 00000000000 12260750563 0017277 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/math/binary_impl.inc 0000664 0000000 0000000 00000000712 12260750563 0022277 0 ustar 00root root 0000000 0000000
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return FUNCTION_IMPL(x, y);
}
#endif
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, double y) {
__CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y);
return FUNCTION_IMPL(x, vec_y);
}
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE x, float y) {
__CLC_GENTYPE vec_y = (__CLC_GENTYPE) (y);
return FUNCTION_IMPL(x, vec_y);
}
libclc-0~git20140101/generic/lib/math/clc_nextafter.cl 0000664 0000000 0000000 00000002530 12260750563 0022440 0 ustar 00root root 0000000 0000000 #include
// This file provides OpenCL C implementations of nextafter for targets that
// don't support the clang builtin.
#define FLT_NAN 0.0f/0.0f
#define NEXTAFTER(FLOAT_TYPE, UINT_TYPE, NAN, ZERO, NEXTAFTER_ZERO) \
_CLC_OVERLOAD _CLC_DEF FLOAT_TYPE __clc_nextafter(FLOAT_TYPE x, FLOAT_TYPE y) { \
union { \
FLOAT_TYPE f; \
UINT_TYPE i; \
} next; \
if (isnan(x) || isnan(y)) { \
return NAN; \
} \
if (x == y) { \
return y; \
} \
next.f = x; \
if (x < y) { \
next.i++; \
} else { \
if (next.f == ZERO) { \
next.i = NEXTAFTER_ZERO; \
} else { \
next.i--; \
} \
} \
return next.f; \
}
NEXTAFTER(float, uint, FLT_NAN, 0.0f, 0x80000001)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define DBL_NAN 0.0/0.0
NEXTAFTER(double, ulong, DBL_NAN, 0.0, 0x8000000000000001)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double)
#endif
libclc-0~git20140101/generic/lib/math/fmax.cl 0000664 0000000 0000000 00000000365 12260750563 0020556 0 ustar 00root root 0000000 0000000 #include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define FUNCTION __clc_fmax
#define FUNCTION_IMPL(x, y) ((x) < (y) ? (y) : (x))
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/math/fmin.cl 0000664 0000000 0000000 00000000365 12260750563 0020554 0 ustar 00root root 0000000 0000000 #include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define FUNCTION __clc_fmin
#define FUNCTION_IMPL(x, y) ((y) < (x) ? (y) : (x))
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/math/hypot.cl 0000664 0000000 0000000 00000000236 12260750563 0020763 0 ustar 00root root 0000000 0000000 #include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/math/hypot.inc 0000664 0000000 0000000 00000000153 12260750563 0021134 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hypot(__CLC_GENTYPE x, __CLC_GENTYPE y) {
return sqrt(x*x + y*y);
}
libclc-0~git20140101/generic/lib/math/mad.cl 0000664 0000000 0000000 00000000234 12260750563 0020357 0 ustar 00root root 0000000 0000000 #include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/math/mad.inc 0000664 0000000 0000000 00000000164 12260750563 0020534 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE mad(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) {
return a * b + c;
}
libclc-0~git20140101/generic/lib/math/nextafter.cl 0000664 0000000 0000000 00000000406 12260750563 0021617 0 ustar 00root root 0000000 0000000 #include
_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __builtin_nextafterf, float, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __builtin_nextafter, double, double)
#endif
libclc-0~git20140101/generic/lib/relational/ 0000775 0000000 0000000 00000000000 12260750563 0020500 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/relational/any.cl 0000664 0000000 0000000 00000002213 12260750563 0021605 0 ustar 00root root 0000000 0000000 #include
#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
#define _CLC_ANY8(v) (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) \
| _CLC_ANY((v).s6) | _CLC_ANY((v).s7))
#define _CLC_ANY16(v) (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) \
| _CLC_ANY((v).sA) | _CLC_ANY((v).sB) \
| _CLC_ANY((v).sC) | _CLC_ANY((v).sD) \
| _CLC_ANY((v).sE) | _CLC_ANY((v).sf))
#define ANY_ID(TYPE) \
_CLC_OVERLOAD _CLC_DEF int any(TYPE v)
#define ANY_VECTORIZE(TYPE) \
ANY_ID(TYPE) { return _CLC_ANY(v); } \
ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \
ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \
ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \
ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \
ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
ANY_VECTORIZE(char)
ANY_VECTORIZE(short)
ANY_VECTORIZE(int)
ANY_VECTORIZE(long)
libclc-0~git20140101/generic/lib/relational/isnan.cl 0000664 0000000 0000000 00000000616 12260750563 0022133 0 ustar 00root root 0000000 0000000 #include
_CLC_DEFINE_UNARY_BUILTIN(int, isnan, __builtin_isnan, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// The scalar version of isnan(double) returns an int, but the vector versions
// return long.
_CLC_DEF _CLC_OVERLOAD int isnan(double x) {
return __builtin_isnan(x);
}
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, isnan, double)
#endif
libclc-0~git20140101/generic/lib/shared/ 0000775 0000000 0000000 00000000000 12260750563 0017614 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/shared/clamp.cl 0000664 0000000 0000000 00000000341 12260750563 0021226 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/shared/clamp.inc 0000664 0000000 0000000 00000000573 12260750563 0021410 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) {
return (x > z ? z : (x < y ? y : x));
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) {
return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x));
}
#endif
libclc-0~git20140101/generic/lib/shared/max.cl 0000664 0000000 0000000 00000000335 12260750563 0020722 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/shared/max.inc 0000664 0000000 0000000 00000000424 12260750563 0021074 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_GENTYPE b) {
return (a > b ? a : b);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
}
#endif
libclc-0~git20140101/generic/lib/shared/min.cl 0000664 0000000 0000000 00000000335 12260750563 0020720 0 ustar 00root root 0000000 0000000 #include
#define __CLC_BODY
#include
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#define __CLC_BODY
#include
libclc-0~git20140101/generic/lib/shared/min.inc 0000664 0000000 0000000 00000000424 12260750563 0021072 0 ustar 00root root 0000000 0000000 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_GENTYPE b) {
return (a < b ? a : b);
}
#ifndef __CLC_SCALAR
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) {
return (a < (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b);
}
#endif
libclc-0~git20140101/generic/lib/shared/vload.cl 0000664 0000000 0000000 00000003164 12260750563 0021245 0 ustar 00root root 0000000 0000000 #include
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
} \
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
#define VLOAD_TYPES() \
VLOAD_ADDR_SPACES(char) \
VLOAD_ADDR_SPACES(uchar) \
VLOAD_ADDR_SPACES(short) \
VLOAD_ADDR_SPACES(ushort) \
VLOAD_ADDR_SPACES(int) \
VLOAD_ADDR_SPACES(uint) \
VLOAD_ADDR_SPACES(long) \
VLOAD_ADDR_SPACES(ulong) \
VLOAD_ADDR_SPACES(float) \
VLOAD_TYPES()
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VLOAD_ADDR_SPACES(double)
#endif
libclc-0~git20140101/generic/lib/shared/vload_impl.ll 0000664 0000000 0000000 00000012724 12260750563 0022301 0 ustar 00root root 0000000 0000000 ; This provides optimized implementations of vload2/3/4/8/16 for 32-bit int/uint
; The address spaces get mapped to data types in target-specific usages
define <2 x i32> @__clc_vload2_i32__addr1(i32 addrspace(1)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <2 x i32> addrspace(1)*
%2 = load <2 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret <2 x i32> %2
}
define <3 x i32> @__clc_vload3_i32__addr1(i32 addrspace(1)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <3 x i32> addrspace(1)*
%2 = load <3 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret <3 x i32> %2
}
define <4 x i32> @__clc_vload4_i32__addr1(i32 addrspace(1)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <4 x i32> addrspace(1)*
%2 = load <4 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret <4 x i32> %2
}
define <8 x i32> @__clc_vload8_i32__addr1(i32 addrspace(1)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <8 x i32> addrspace(1)*
%2 = load <8 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret <8 x i32> %2
}
define <16 x i32> @__clc_vload16_i32__addr1(i32 addrspace(1)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <16 x i32> addrspace(1)*
%2 = load <16 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret <16 x i32> %2
}
define <2 x i32> @__clc_vload2_i32__addr2(i32 addrspace(2)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(2)* %addr to <2 x i32> addrspace(2)*
%2 = load <2 x i32> addrspace(2)* %1, align 4, !tbaa !3
ret <2 x i32> %2
}
define <3 x i32> @__clc_vload3_i32__addr2(i32 addrspace(2)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(2)* %addr to <3 x i32> addrspace(2)*
%2 = load <3 x i32> addrspace(2)* %1, align 4, !tbaa !3
ret <3 x i32> %2
}
define <4 x i32> @__clc_vload4_i32__addr2(i32 addrspace(2)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(2)* %addr to <4 x i32> addrspace(2)*
%2 = load <4 x i32> addrspace(2)* %1, align 4, !tbaa !3
ret <4 x i32> %2
}
define <8 x i32> @__clc_vload8_i32__addr2(i32 addrspace(2)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(2)* %addr to <8 x i32> addrspace(2)*
%2 = load <8 x i32> addrspace(2)* %1, align 4, !tbaa !3
ret <8 x i32> %2
}
define <16 x i32> @__clc_vload16_i32__addr2(i32 addrspace(2)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(2)* %addr to <16 x i32> addrspace(2)*
%2 = load <16 x i32> addrspace(2)* %1, align 4, !tbaa !3
ret <16 x i32> %2
}
define <2 x i32> @__clc_vload2_i32__addr3(i32 addrspace(3)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(3)* %addr to <2 x i32> addrspace(3)*
%2 = load <2 x i32> addrspace(3)* %1, align 4, !tbaa !3
ret <2 x i32> %2
}
define <3 x i32> @__clc_vload3_i32__addr3(i32 addrspace(3)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(3)* %addr to <3 x i32> addrspace(3)*
%2 = load <3 x i32> addrspace(3)* %1, align 4, !tbaa !3
ret <3 x i32> %2
}
define <4 x i32> @__clc_vload4_i32__addr3(i32 addrspace(3)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(3)* %addr to <4 x i32> addrspace(3)*
%2 = load <4 x i32> addrspace(3)* %1, align 4, !tbaa !3
ret <4 x i32> %2
}
define <8 x i32> @__clc_vload8_i32__addr3(i32 addrspace(3)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(3)* %addr to <8 x i32> addrspace(3)*
%2 = load <8 x i32> addrspace(3)* %1, align 4, !tbaa !3
ret <8 x i32> %2
}
define <16 x i32> @__clc_vload16_i32__addr3(i32 addrspace(3)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(3)* %addr to <16 x i32> addrspace(3)*
%2 = load <16 x i32> addrspace(3)* %1, align 4, !tbaa !3
ret <16 x i32> %2
}
define <2 x i32> @__clc_vload2_i32__addr4(i32 addrspace(4)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(4)* %addr to <2 x i32> addrspace(4)*
%2 = load <2 x i32> addrspace(4)* %1, align 4, !tbaa !3
ret <2 x i32> %2
}
define <3 x i32> @__clc_vload3_i32__addr4(i32 addrspace(4)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(4)* %addr to <3 x i32> addrspace(4)*
%2 = load <3 x i32> addrspace(4)* %1, align 4, !tbaa !3
ret <3 x i32> %2
}
define <4 x i32> @__clc_vload4_i32__addr4(i32 addrspace(4)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(4)* %addr to <4 x i32> addrspace(4)*
%2 = load <4 x i32> addrspace(4)* %1, align 4, !tbaa !3
ret <4 x i32> %2
}
define <8 x i32> @__clc_vload8_i32__addr4(i32 addrspace(4)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(4)* %addr to <8 x i32> addrspace(4)*
%2 = load <8 x i32> addrspace(4)* %1, align 4, !tbaa !3
ret <8 x i32> %2
}
define <16 x i32> @__clc_vload16_i32__addr4(i32 addrspace(4)* nocapture %addr) nounwind readonly alwaysinline {
%1 = bitcast i32 addrspace(4)* %addr to <16 x i32> addrspace(4)*
%2 = load <16 x i32> addrspace(4)* %1, align 4, !tbaa !3
ret <16 x i32> %2
}
!1 = metadata !{metadata !"char", metadata !5}
!2 = metadata !{metadata !"short", metadata !5}
!3 = metadata !{metadata !"int", metadata !5}
!4 = metadata !{metadata !"long", metadata !5}
!5 = metadata !{metadata !"omnipotent char", metadata !6}
!6 = metadata !{metadata !"Simple C/C++ TBAA"}
libclc-0~git20140101/generic/lib/shared/vstore.cl 0000664 0000000 0000000 00000003350 12260750563 0021457 0 ustar 00root root 0000000 0000000 #include
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
mem[2*offset] = vec.s0; \
mem[2*offset+1] = vec.s1; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
mem[3*offset] = vec.s0; \
mem[3*offset+1] = vec.s1; \
mem[3*offset+2] = vec.s2; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
vstore2(vec.lo, 0, &mem[offset*4]); \
vstore2(vec.hi, 1, &mem[offset*4]); \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
vstore4(vec.lo, 0, &mem[offset*8]); \
vstore4(vec.hi, 1, &mem[offset*8]); \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
vstore8(vec.lo, 0, &mem[offset*16]); \
vstore8(vec.hi, 1, &mem[offset*16]); \
} \
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __private) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
#define VSTORE_TYPES() \
VSTORE_ADDR_SPACES(char) \
VSTORE_ADDR_SPACES(uchar) \
VSTORE_ADDR_SPACES(short) \
VSTORE_ADDR_SPACES(ushort) \
VSTORE_ADDR_SPACES(int) \
VSTORE_ADDR_SPACES(uint) \
VSTORE_ADDR_SPACES(long) \
VSTORE_ADDR_SPACES(ulong) \
VSTORE_ADDR_SPACES(float) \
VSTORE_TYPES()
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VSTORE_ADDR_SPACES(double)
#endif
libclc-0~git20140101/generic/lib/shared/vstore_impl.ll 0000664 0000000 0000000 00000003351 12260750563 0022512 0 ustar 00root root 0000000 0000000 ; This provides optimized implementations of vstore2/3/4/8/16 for 32-bit int/uint
; The address spaces get mapped to data types in target-specific usages
define void @__clc_vstore2_i32__addr1(<2 x i32> %vec, i32 addrspace(1)* nocapture %addr) nounwind alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <2 x i32> addrspace(1)*
store <2 x i32> %vec, <2 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret void
}
define void @__clc_vstore3_i32__addr1(<3 x i32> %vec, i32 addrspace(1)* nocapture %addr) nounwind alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <3 x i32> addrspace(1)*
store <3 x i32> %vec, <3 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret void
}
define void @__clc_vstore4_i32__addr1(<4 x i32> %vec, i32 addrspace(1)* nocapture %addr) nounwind alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <4 x i32> addrspace(1)*
store <4 x i32> %vec, <4 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret void
}
define void @__clc_vstore8_i32__addr1(<8 x i32> %vec, i32 addrspace(1)* nocapture %addr) nounwind alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <8 x i32> addrspace(1)*
store <8 x i32> %vec, <8 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret void
}
define void @__clc_vstore16_i32__addr1(<16 x i32> %vec, i32 addrspace(1)* nocapture %addr) nounwind alwaysinline {
%1 = bitcast i32 addrspace(1)* %addr to <16 x i32> addrspace(1)*
store <16 x i32> %vec, <16 x i32> addrspace(1)* %1, align 4, !tbaa !3
ret void
}
!1 = metadata !{metadata !"char", metadata !5}
!2 = metadata !{metadata !"short", metadata !5}
!3 = metadata !{metadata !"int", metadata !5}
!4 = metadata !{metadata !"long", metadata !5}
!5 = metadata !{metadata !"omnipotent char", metadata !6}
!6 = metadata !{metadata !"Simple C/C++ TBAA"}
libclc-0~git20140101/generic/lib/workitem/ 0000775 0000000 0000000 00000000000 12260750563 0020207 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/generic/lib/workitem/get_global_id.cl 0000664 0000000 0000000 00000000206 12260750563 0023300 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF size_t get_global_id(uint dim) {
return get_group_id(dim)*get_local_size(dim) + get_local_id(dim);
}
libclc-0~git20140101/generic/lib/workitem/get_global_size.cl 0000664 0000000 0000000 00000000166 12260750563 0023663 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF size_t get_global_size(uint dim) {
return get_num_groups(dim)*get_local_size(dim);
}
libclc-0~git20140101/ptx-nvidiacl/ 0000775 0000000 0000000 00000000000 12260750563 0016566 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx-nvidiacl/lib/ 0000775 0000000 0000000 00000000000 12260750563 0017334 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx-nvidiacl/lib/SOURCES 0000664 0000000 0000000 00000000203 12260750563 0020375 0 ustar 00root root 0000000 0000000 synchronization/barrier.cl
workitem/get_group_id.cl
workitem/get_local_id.cl
workitem/get_local_size.cl
workitem/get_num_groups.cl
libclc-0~git20140101/ptx-nvidiacl/lib/synchronization/ 0000775 0000000 0000000 00000000000 12260750563 0022575 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx-nvidiacl/lib/synchronization/barrier.cl 0000664 0000000 0000000 00000000223 12260750563 0024540 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF void barrier(cl_mem_fence_flags flags) {
if (flags & CLK_LOCAL_MEM_FENCE) {
__builtin_ptx_bar_sync(0);
}
}
libclc-0~git20140101/ptx-nvidiacl/lib/workitem/ 0000775 0000000 0000000 00000000000 12260750563 0021175 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx-nvidiacl/lib/workitem/get_group_id.cl 0000664 0000000 0000000 00000000373 12260750563 0024167 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF size_t get_group_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ctaid_x();
case 1: return __builtin_ptx_read_ctaid_y();
case 2: return __builtin_ptx_read_ctaid_z();
default: return 0;
}
}
libclc-0~git20140101/ptx-nvidiacl/lib/workitem/get_local_id.cl 0000664 0000000 0000000 00000000365 12260750563 0024126 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF size_t get_local_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_tid_x();
case 1: return __builtin_ptx_read_tid_y();
case 2: return __builtin_ptx_read_tid_z();
default: return 0;
}
}
libclc-0~git20140101/ptx-nvidiacl/lib/workitem/get_local_size.cl 0000664 0000000 0000000 00000000372 12260750563 0024502 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF size_t get_local_size(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ntid_x();
case 1: return __builtin_ptx_read_ntid_y();
case 2: return __builtin_ptx_read_ntid_z();
default: return 0;
}
}
libclc-0~git20140101/ptx-nvidiacl/lib/workitem/get_num_groups.cl 0000664 0000000 0000000 00000000400 12260750563 0024544 0 ustar 00root root 0000000 0000000 #include
_CLC_DEF size_t get_num_groups(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_nctaid_x();
case 1: return __builtin_ptx_read_nctaid_y();
case 2: return __builtin_ptx_read_nctaid_z();
default: return 0;
}
}
libclc-0~git20140101/ptx/ 0000775 0000000 0000000 00000000000 12260750563 0014777 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx/lib/ 0000775 0000000 0000000 00000000000 12260750563 0015545 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx/lib/OVERRIDES 0000664 0000000 0000000 00000000054 12260750563 0017031 0 ustar 00root root 0000000 0000000 integer/add_sat_if.ll
integer/sub_sat_if.ll
libclc-0~git20140101/ptx/lib/SOURCES 0000664 0000000 0000000 00000000045 12260750563 0016612 0 ustar 00root root 0000000 0000000 integer/add_sat.ll
integer/sub_sat.ll libclc-0~git20140101/ptx/lib/integer/ 0000775 0000000 0000000 00000000000 12260750563 0017202 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/ptx/lib/integer/add_sat.ll 0000664 0000000 0000000 00000003315 12260750563 0021134 0 ustar 00root root 0000000 0000000 declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
define ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
define ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
define ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
define ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
define ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
define ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
define ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
define ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}
libclc-0~git20140101/ptx/lib/integer/sub_sat.ll 0000664 0000000 0000000 00000003315 12260750563 0021175 0 ustar 00root root 0000000 0000000 declare i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
define ptx_device i8 @__clc_sub_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_sub_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
define ptx_device i8 @__clc_sub_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_sub_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
define ptx_device i16 @__clc_sub_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_sub_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
define ptx_device i16 @__clc_sub_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_sub_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
define ptx_device i32 @__clc_sub_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_sub_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
define ptx_device i32 @__clc_sub_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_sub_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
define ptx_device i64 @__clc_sub_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_sub_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
define ptx_device i64 @__clc_sub_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_sub_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}
libclc-0~git20140101/r600/ 0000775 0000000 0000000 00000000000 12260750563 0014653 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/ 0000775 0000000 0000000 00000000000 12260750563 0015421 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/OVERRIDES 0000664 0000000 0000000 00000000065 12260750563 0016707 0 ustar 00root root 0000000 0000000 workitem/get_group_id.cl
workitem/get_global_size.cl
libclc-0~git20140101/r600/lib/SOURCES 0000664 0000000 0000000 00000000362 12260750563 0016470 0 ustar 00root root 0000000 0000000 atomic/atomic.cl
math/nextafter.cl
workitem/get_num_groups.ll
workitem/get_group_id.ll
workitem/get_local_size.ll
workitem/get_local_id.ll
workitem/get_global_size.ll
synchronization/barrier.cl
synchronization/barrier_impl.ll
shared/vload.cl
libclc-0~git20140101/r600/lib/atomic/ 0000775 0000000 0000000 00000000000 12260750563 0016675 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/atomic/atomic.cl 0000664 0000000 0000000 00000001625 12260750563 0020475 0 ustar 00root root 0000000 0000000 #include
#define ATOMIC_FUNC_TYPE(SIGN, TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
_CLC_OVERLOAD _CLC_DEF SIGN TYPE FUNCTION (volatile CL_ADDRSPACE SIGN TYPE *p, SIGN TYPE val) { \
return (SIGN TYPE)__clc_##FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE signed TYPE*)p, (signed TYPE)val); \
}
#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
_CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \
ATOMIC_FUNC_TYPE(signed, TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
ATOMIC_FUNC_TYPE(unsigned, TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \
ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \
ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3)
#define ATOMIC_FUNC(FUNCTION) \
ATOMIC_FUNC_ADDRSPACE(int, FUNCTION)
ATOMIC_FUNC(atomic_add)
ATOMIC_FUNC(atomic_sub)
libclc-0~git20140101/r600/lib/math/ 0000775 0000000 0000000 00000000000 12260750563 0016352 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/math/nextafter.cl 0000664 0000000 0000000 00000000142 12260750563 0020667 0 ustar 00root root 0000000 0000000 #include
_CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
libclc-0~git20140101/r600/lib/shared/ 0000775 0000000 0000000 00000000000 12260750563 0016667 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/shared/vload.cl 0000664 0000000 0000000 00000007602 12260750563 0020321 0 ustar 00root root 0000000 0000000 #include
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
} \
#define VLOAD_ADDR_SPACES(SCALAR_GENTYPE) \
VLOAD_VECTORIZE(SCALAR_GENTYPE, __private) \
VLOAD_VECTORIZE(SCALAR_GENTYPE, __local) \
VLOAD_VECTORIZE(SCALAR_GENTYPE, __constant) \
VLOAD_VECTORIZE(SCALAR_GENTYPE, __global) \
//int/uint are special... see below
#define VLOAD_TYPES() \
VLOAD_ADDR_SPACES(char) \
VLOAD_ADDR_SPACES(uchar) \
VLOAD_ADDR_SPACES(short) \
VLOAD_ADDR_SPACES(ushort) \
VLOAD_ADDR_SPACES(long) \
VLOAD_ADDR_SPACES(ulong) \
VLOAD_ADDR_SPACES(float) \
VLOAD_TYPES()
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VLOAD_ADDR_SPACES(double)
#endif
//Assembly overrides start here
VLOAD_VECTORIZE(int, __private)
VLOAD_VECTORIZE(int, __local)
VLOAD_VECTORIZE(uint, __private)
VLOAD_VECTORIZE(uint, __local)
//We only define functions for typeN vloadN(), and then just bitcast the result for unsigned types
#define _CLC_VLOAD_ASM_DECL(PRIM_TYPE,LLVM_SCALAR_TYPE,ADDR_SPACE,ADDR_SPACE_ID) \
_CLC_DECL PRIM_TYPE##2 __clc_vload2_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (const ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL PRIM_TYPE##3 __clc_vload3_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (const ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL PRIM_TYPE##4 __clc_vload4_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (const ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL PRIM_TYPE##8 __clc_vload8_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (const ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL PRIM_TYPE##16 __clc_vload16_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (const ADDR_SPACE PRIM_TYPE *); \
#define _CLC_VLOAD_ASM_DEFINE(PRIM_TYPE,S_PRIM_TYPE, LLVM_SCALAR_TYPE,VEC_WIDTH,ADDR_SPACE,ADDR_SPACE_ID) \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##VEC_WIDTH vload##VEC_WIDTH (size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
return __builtin_astype(__clc_vload##VEC_WIDTH##_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID ((const ADDR_SPACE S_PRIM_TYPE *)&x[VEC_WIDTH * offset]), PRIM_TYPE##VEC_WIDTH); \
} \
#define _CLC_VLOAD_ASM_OVERLOAD_SIZES(PRIM_TYPE,S_PRIM_TYPE,LLVM_TYPE,ADDR_SPACE,ADDR_SPACE_ID) \
_CLC_VLOAD_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 2, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VLOAD_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 3, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VLOAD_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 4, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VLOAD_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 8, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VLOAD_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 16, ADDR_SPACE, ADDR_SPACE_ID) \
#define _CLC_VLOAD_ASM_OVERLOAD_ADDR_SPACES(PRIM_TYPE,S_PRIM_TYPE,LLVM_TYPE) \
_CLC_VLOAD_ASM_OVERLOAD_SIZES(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, global, 1) \
_CLC_VLOAD_ASM_OVERLOAD_SIZES(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, constant, 2) \
#define _CLC_VLOAD_ASM_OVERLOADS() \
_CLC_VLOAD_ASM_DECL(int,i32,__global,1) \
_CLC_VLOAD_ASM_DECL(int,i32,__constant,2) \
_CLC_VLOAD_ASM_OVERLOAD_ADDR_SPACES(int,int,i32) \
_CLC_VLOAD_ASM_OVERLOAD_ADDR_SPACES(uint,int,i32) \
_CLC_VLOAD_ASM_OVERLOADS() libclc-0~git20140101/r600/lib/shared/vstore.cl 0000664 0000000 0000000 00000010526 12260750563 0020535 0 ustar 00root root 0000000 0000000 #include
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
mem[2*offset] = vec.s0; \
mem[2*offset+1] = vec.s1; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
mem[3*offset] = vec.s0; \
mem[3*offset+1] = vec.s1; \
mem[3*offset+2] = vec.s2; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
vstore2(vec.lo, 0, &mem[offset*4]); \
vstore2(vec.hi, 1, &mem[offset*4]); \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
vstore4(vec.lo, 0, &mem[offset*8]); \
vstore4(vec.hi, 1, &mem[offset*8]); \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
vstore8(vec.lo, 0, &mem[offset*16]); \
vstore8(vec.hi, 1, &mem[offset*16]); \
} \
#define VSTORE_ADDR_SPACES(SCALAR_GENTYPE) \
VSTORE_VECTORIZE(SCALAR_GENTYPE, __private) \
VSTORE_VECTORIZE(SCALAR_GENTYPE, __local) \
VSTORE_VECTORIZE(SCALAR_GENTYPE, __global) \
//int/uint are special... see below
#define VSTORE_TYPES() \
VSTORE_ADDR_SPACES(char) \
VSTORE_ADDR_SPACES(uchar) \
VSTORE_ADDR_SPACES(short) \
VSTORE_ADDR_SPACES(ushort) \
VSTORE_ADDR_SPACES(long) \
VSTORE_ADDR_SPACES(ulong) \
VSTORE_ADDR_SPACES(float) \
VSTORE_TYPES()
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VSTORE_ADDR_SPACES(double)
#endif
VSTORE_VECTORIZE(int, __private)
VSTORE_VECTORIZE(int, __local)
VSTORE_VECTORIZE(uint, __private)
VSTORE_VECTORIZE(uint, __local)
_CLC_OVERLOAD _CLC_DEF void vstore3(int3 vec, size_t offset, global int *mem) {
mem[3*offset] = vec.s0;
mem[3*offset+1] = vec.s1;
mem[3*offset+2] = vec.s2;
}
_CLC_OVERLOAD _CLC_DEF void vstore3(uint3 vec, size_t offset, global uint *mem) {
mem[3*offset] = vec.s0;
mem[3*offset+1] = vec.s1;
mem[3*offset+2] = vec.s2;
}
/*Note: R600 doesn't support store <3 x ?>... so
* those functions aren't actually overridden here... lowest-common-denominator
*/
//We only define functions for signed_type vstoreN(), and then just cast the pointers/vectors for unsigned types
#define _CLC_VSTORE_ASM_DECL(PRIM_TYPE,LLVM_SCALAR_TYPE,ADDR_SPACE,ADDR_SPACE_ID) \
_CLC_DECL void __clc_vstore2_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (PRIM_TYPE##2, ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL void __clc_vstore4_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (PRIM_TYPE##4, ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL void __clc_vstore8_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (PRIM_TYPE##8, ADDR_SPACE PRIM_TYPE *); \
_CLC_DECL void __clc_vstore16_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (PRIM_TYPE##16, ADDR_SPACE PRIM_TYPE *); \
#define _CLC_VSTORE_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_SCALAR_TYPE, VEC_WIDTH, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_OVERLOAD _CLC_DEF void vstore##VEC_WIDTH(PRIM_TYPE##VEC_WIDTH vec, size_t offset, ADDR_SPACE PRIM_TYPE *x) { \
__clc_vstore##VEC_WIDTH##_##LLVM_SCALAR_TYPE##__addr##ADDR_SPACE_ID (__builtin_astype(vec, S_PRIM_TYPE##VEC_WIDTH), (ADDR_SPACE S_PRIM_TYPE *)&x[ VEC_WIDTH * offset]); \
} \
/*Note: R600 back-end doesn't support load <3 x ?>... so
* those functions aren't actually overridden here... When the back-end supports
* that, then clean add here, and remove the vstore3 definitions from above.
*/
#define _CLC_VSTORE_ASM_OVERLOAD_SIZES(PRIM_TYPE,S_PRIM_TYPE,LLVM_TYPE,ADDR_SPACE,ADDR_SPACE_ID) \
_CLC_VSTORE_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 2, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VSTORE_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 4, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VSTORE_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 8, ADDR_SPACE, ADDR_SPACE_ID) \
_CLC_VSTORE_ASM_DEFINE(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, 16, ADDR_SPACE, ADDR_SPACE_ID) \
#define _CLC_VSTORE_ASM_OVERLOAD_ADDR_SPACES(PRIM_TYPE,S_PRIM_TYPE,LLVM_TYPE) \
_CLC_VSTORE_ASM_OVERLOAD_SIZES(PRIM_TYPE, S_PRIM_TYPE, LLVM_TYPE, global, 1) \
#define _CLC_VSTORE_ASM_OVERLOADS() \
_CLC_VSTORE_ASM_DECL(int,i32,__global,1) \
_CLC_VSTORE_ASM_OVERLOAD_ADDR_SPACES(int,int,i32) \
_CLC_VSTORE_ASM_OVERLOAD_ADDR_SPACES(uint,int,i32) \
_CLC_VSTORE_ASM_OVERLOADS() libclc-0~git20140101/r600/lib/synchronization/ 0000775 0000000 0000000 00000000000 12260750563 0020662 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/synchronization/barrier.cl 0000664 0000000 0000000 00000000260 12260750563 0022626 0 ustar 00root root 0000000 0000000
#include
_CLC_DEF int __clc_clk_local_mem_fence() {
return CLK_LOCAL_MEM_FENCE;
}
_CLC_DEF int __clc_clk_global_mem_fence() {
return CLK_GLOBAL_MEM_FENCE;
}
libclc-0~git20140101/r600/lib/synchronization/barrier_impl.ll 0000664 0000000 0000000 00000001666 12260750563 0023673 0 ustar 00root root 0000000 0000000 declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
barrier_local_test:
%CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
%0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
%1 = icmp ne i32 %0, 0
br i1 %1, label %barrier_local, label %barrier_global_test
barrier_local:
call void @llvm.AMDGPU.barrier.local() noduplicate
br label %barrier_global_test
barrier_global_test:
%CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
%2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
%3 = icmp ne i32 %2, 0
br i1 %3, label %barrier_global, label %done
barrier_global:
call void @llvm.AMDGPU.barrier.global() noduplicate
br label %done
done:
ret void
}
libclc-0~git20140101/r600/lib/workitem/ 0000775 0000000 0000000 00000000000 12260750563 0017262 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/r600/lib/workitem/get_global_size.ll 0000664 0000000 0000000 00000001173 12260750563 0022746 0 ustar 00root root 0000000 0000000 declare i32 @llvm.r600.read.global.size.x() nounwind readnone
declare i32 @llvm.r600.read.global.size.y() nounwind readnone
declare i32 @llvm.r600.read.global.size.z() nounwind readnone
define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline {
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
x_dim:
%x = call i32 @llvm.r600.read.global.size.x() nounwind readnone
ret i32 %x
y_dim:
%y = call i32 @llvm.r600.read.global.size.y() nounwind readnone
ret i32 %y
z_dim:
%z = call i32 @llvm.r600.read.global.size.z() nounwind readnone
ret i32 %z
default:
ret i32 0
}
libclc-0~git20140101/r600/lib/workitem/get_group_id.ll 0000664 0000000 0000000 00000001116 12260750563 0022261 0 ustar 00root root 0000000 0000000 declare i32 @llvm.r600.read.tgid.x() nounwind readnone
declare i32 @llvm.r600.read.tgid.y() nounwind readnone
declare i32 @llvm.r600.read.tgid.z() nounwind readnone
define i32 @get_group_id(i32 %dim) nounwind readnone alwaysinline {
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
x_dim:
%x = call i32 @llvm.r600.read.tgid.x() nounwind readnone
ret i32 %x
y_dim:
%y = call i32 @llvm.r600.read.tgid.y() nounwind readnone
ret i32 %y
z_dim:
%z = call i32 @llvm.r600.read.tgid.z() nounwind readnone
ret i32 %z
default:
ret i32 0
}
libclc-0~git20140101/r600/lib/workitem/get_local_id.ll 0000664 0000000 0000000 00000001124 12260750563 0022216 0 ustar 00root root 0000000 0000000 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare i32 @llvm.r600.read.tidig.y() nounwind readnone
declare i32 @llvm.r600.read.tidig.z() nounwind readnone
define i32 @get_local_id(i32 %dim) nounwind readnone alwaysinline {
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
x_dim:
%x = call i32 @llvm.r600.read.tidig.x() nounwind readnone
ret i32 %x
y_dim:
%y = call i32 @llvm.r600.read.tidig.y() nounwind readnone
ret i32 %y
z_dim:
%z = call i32 @llvm.r600.read.tidig.z() nounwind readnone
ret i32 %z
default:
ret i32 0
}
libclc-0~git20140101/r600/lib/workitem/get_local_size.ll 0000664 0000000 0000000 00000001164 12260750563 0022600 0 ustar 00root root 0000000 0000000 declare i32 @llvm.r600.read.local.size.x() nounwind readnone
declare i32 @llvm.r600.read.local.size.y() nounwind readnone
declare i32 @llvm.r600.read.local.size.z() nounwind readnone
define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline {
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
x_dim:
%x = call i32 @llvm.r600.read.local.size.x() nounwind readnone
ret i32 %x
y_dim:
%y = call i32 @llvm.r600.read.local.size.y() nounwind readnone
ret i32 %y
z_dim:
%z = call i32 @llvm.r600.read.local.size.z() nounwind readnone
ret i32 %z
default:
ret i32 0
}
libclc-0~git20140101/r600/lib/workitem/get_num_groups.ll 0000664 0000000 0000000 00000001142 12260750563 0022646 0 ustar 00root root 0000000 0000000 declare i32 @llvm.r600.read.ngroups.x() nounwind readnone
declare i32 @llvm.r600.read.ngroups.y() nounwind readnone
declare i32 @llvm.r600.read.ngroups.z() nounwind readnone
define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline {
switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim]
x_dim:
%x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone
ret i32 %x
y_dim:
%y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone
ret i32 %y
z_dim:
%z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone
ret i32 %z
default:
ret i32 0
}
libclc-0~git20140101/test/ 0000775 0000000 0000000 00000000000 12260750563 0015143 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/test/add_sat.cl 0000664 0000000 0000000 00000000144 12260750563 0017061 0 ustar 00root root 0000000 0000000 __kernel void foo(__global char *a, __global char *b, __global char *c) {
*a = add_sat(*b, *c);
}
libclc-0~git20140101/test/as_type.cl 0000664 0000000 0000000 00000000076 12260750563 0017132 0 ustar 00root root 0000000 0000000 __kernel void foo(int4 *x, float4 *y) {
*x = as_int4(*y);
}
libclc-0~git20140101/test/convert.cl 0000664 0000000 0000000 00000000103 12260750563 0017135 0 ustar 00root root 0000000 0000000 __kernel void foo(int4 *x, float4 *y) {
*x = convert_int4(*y);
}
libclc-0~git20140101/test/cos.cl 0000664 0000000 0000000 00000000061 12260750563 0016244 0 ustar 00root root 0000000 0000000 __kernel void foo(float4 *f) {
*f = cos(*f);
}
libclc-0~git20140101/test/cross.cl 0000664 0000000 0000000 00000000073 12260750563 0016614 0 ustar 00root root 0000000 0000000 __kernel void foo(float4 *f) {
*f = cross(f[0], f[1]);
}
libclc-0~git20140101/test/fabs.cl 0000664 0000000 0000000 00000000061 12260750563 0016373 0 ustar 00root root 0000000 0000000 __kernel void foo(float *f) {
*f = fabs(*f);
}
libclc-0~git20140101/test/get_group_id.cl 0000664 0000000 0000000 00000000070 12260750563 0020127 0 ustar 00root root 0000000 0000000 __kernel void foo(int *i) {
i[get_group_id(0)] = 1;
}
libclc-0~git20140101/test/rsqrt.cl 0000664 0000000 0000000 00000000210 12260750563 0016627 0 ustar 00root root 0000000 0000000 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
__kernel void foo(float4 *x, double4 *y) {
x[1] = rsqrt(x[0]);
y[1] = rsqrt(y[0]);
}
libclc-0~git20140101/test/subsat.cl 0000664 0000000 0000000 00000000572 12260750563 0016770 0 ustar 00root root 0000000 0000000 __kernel void test_subsat_char(char *a, char x, char y) {
*a = sub_sat(x, y);
return;
}
__kernel void test_subsat_uchar(uchar *a, uchar x, uchar y) {
*a = sub_sat(x, y);
return;
}
__kernel void test_subsat_long(long *a, long x, long y) {
*a = sub_sat(x, y);
return;
}
__kernel void test_subsat_ulong(ulong *a, ulong x, ulong y) {
*a = sub_sat(x, y);
return;
} libclc-0~git20140101/utils/ 0000775 0000000 0000000 00000000000 12260750563 0015324 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/utils/prepare-builtins.cpp 0000664 0000000 0000000 00000004717 12260750563 0021326 0 ustar 00root root 0000000 0000000 #include "llvm/ADT/OwningPtr.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Config/config.h"
using namespace llvm;
static cl::opt
InputFilename(cl::Positional, cl::desc(" "), cl::init("-"));
static cl::opt
OutputFilename("o", cl::desc("Output filename"),
cl::value_desc("filename"));
int main(int argc, char **argv) {
LLVMContext &Context = getGlobalContext();
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "libclc builtin preparation tool\n");
std::string ErrorMessage;
std::auto_ptr M;
{
OwningPtr BufferPtr;
if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
ErrorMessage = ec.message();
else
M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
}
if (M.get() == 0) {
errs() << argv[0] << ": ";
if (ErrorMessage.size())
errs() << ErrorMessage << "\n";
else
errs() << "bitcode didn't read correctly.\n";
return 1;
}
// Set linkage of every external definition to linkonce_odr.
for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
i->setLinkage(GlobalValue::LinkOnceODRLinkage);
}
for (Module::global_iterator i = M->global_begin(), e = M->global_end();
i != e; ++i) {
if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
i->setLinkage(GlobalValue::LinkOnceODRLinkage);
}
if (OutputFilename.empty()) {
errs() << "no output file\n";
return 1;
}
std::string ErrorInfo;
OwningPtr Out
(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 3)
sys::fs::F_Binary));
#else
raw_fd_ostream::F_Binary));
#endif
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
exit(1);
}
WriteBitcodeToFile(M.get(), Out->os());
// Declare success.
Out->keep();
return 0;
}
libclc-0~git20140101/www/ 0000775 0000000 0000000 00000000000 12260750563 0015010 5 ustar 00root root 0000000 0000000 libclc-0~git20140101/www/index.html 0000664 0000000 0000000 00000003210 12260750563 0017001 0 ustar 00root root 0000000 0000000