pax_global_header00006660000000000000000000000064130104140670014506gustar00rootroot0000000000000052 comment=99ee83506d5fbf711df8b162e1bc238c9dffb3d1 cl-cl-1.2.3/000077500000000000000000000000001301041406700125035ustar00rootroot00000000000000cl-cl-1.2.3/.gitignore000066400000000000000000000005721301041406700144770ustar00rootroot00000000000000# git-ls-files --others --exclude-from=.git/info/exclude # Lines that start with '#' are comments. # For a project mostly in C, the following would be a good set of # exclude patterns (uncomment them if you want to use them): # *.[oa] *~ .DS_Store *.beam # Emacs Tag files TAGS # c_src /c_src/*.o /c_src/*.exp /c_src/*.lib /c_src/*.pdb # Derivates /_build/* /priv/* rebar.lockcl-cl-1.2.3/COPYRIGHT000066400000000000000000000024561301041406700140050ustar00rootroot00000000000000Copyright (C) 2007 - 2012, Rogvall Invest AB, Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name(s) of the above copyright holders shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization. cl-cl-1.2.3/README000066400000000000000000000031431301041406700133640ustar00rootroot00000000000000Welcome to the Erlang OpenCL binding To get started you need erlang, preferably R16B. You also need 'rebar3' and a 'C' compiler, i.e. GCC (or CL.EXE) and a machine with OpenCL installed. To build and test: rebar3 do compile, edoc, ct To build examples: Goto the examples directory and run make. Windows Users: The build look for the OpenCL files in /opt/local/ by default. You can also set the environment variable OPENCL_DIR to point to another location. This is an example of howto set up the building env: I'm assuming you got an mingw environment. Download a development kit from Nvidia, ATI or Intel: e.g. http://software.intel.com/en-us/vcsource/tools/opencl-sdk-2013 cp -R /c/Intel SDK/lib /opt/local/ cp -R /c/Intel SDK/include /opt/local/ Example building 64b from CMD: ------------------------------ Setup windows build environment c:\src\cl> "c:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" Setup the paths c:\src\cl> set PATH="c:\Program Files\erl5.10.1\bin";%PATH% c:\src\cl> set PATH="c:\tools\git\cmd";%PATH% c:\src\cl> set OPENCL_DIR="c:\Intel~1\" And build c:\src\cl> ..\rebar\rebar.cmd compile ======= If you want to build with mingw gcc use: CC=gcc rebar compile NOTE: That mingw64-gcc can not be linked with MSVC libs, see http://sourceforge.net/apps/trac/mingw-w64/wiki/Answer%2064%20bit%20MSVC-generated%20x64%20.lib Follow the steps there to make a libOpenCL.dll.a and it should work. NOTE: OpenCL with ATI drivers for CPU usage don't work when erlang is started within a mingw shell but does if you start it from a Windows CMD.exe shell. (This may depend on the AMD driver version) cl-cl-1.2.3/c_src/000077500000000000000000000000001301041406700135745ustar00rootroot00000000000000cl-cl-1.2.3/c_src/Makefile000066400000000000000000000104341301041406700152360ustar00rootroot00000000000000# # Copyright (C) 2016, Rogvall Invest AB, # # This software is licensed as described in the file COPYRIGHT, which # you should have received as part of this distribution. The terms # are also available at http://www.rogvall.se/docs/copyright.txt. # # You may opt to use, copy, modify, merge, publish, distribute and/or sell # copies of the Software, and permit persons to whom the Software is # furnished to do so, under the terms of the COPYRIGHT file. # # This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY # KIND, either express or implied. # OSNAME := $(shell uname -s) MACHINE := $(shell uname -m) OUT_C = -o OUT_L = -o OBJ = o ## us NOCL=true make to force build with gcc on windows WORDSIZE = $(shell erl -noshell -eval "io:format([126,119,126,110],[erlang:system_info(wordsize)*8])" -s erlang halt) ifeq ($(ERLANG_ROOT_DIR), ) ERLDIR := $(shell erl -noshell -eval "io:format([126,115,126,110],[code:root_dir()])" -s erlang halt) ERL_C_INCLUDE_DIR := "$(ERLDIR)/usr/include" else ERL_C_INCLUDE_DIR := "$(ERLANG_ROOT_DIR)/usr/include" endif OCL_DIR := $(shell cd ..; pwd) ifneq (, $(findstring MINGW,$(OSNAME))) MINGW = Yes endif MAC_OS_X = No WIN32_GCC = No WIN32_CL = No LINUX = No EXT = so PRIVDIR=../priv ifeq ($(TYPE), debug) CFLAGS += -Ddebug -DDEBUG -g -Wall -Wextra -Wswitch-default -Wswitch-enum -D_THREAD_SAFE CFLAGS += -D_REENTRANT -fno-common -I$(ERL_C_INCLUDE_DIR) WIN_DEBUG = -Ddebug -DDEBUG endif ifeq ($(TYPE), release) CFLAGS += -Wall -Wextra -Wswitch-default -Wswitch-enum -D_THREAD_SAFE -D_REENTRANT -fno-common CFLAGS += -Wno-deprecated-declarations -Wno-missing-field-initializers -I$(ERL_C_INCLUDE_DIR) endif ifeq ($(OSNAME), Linux) LINUX = Yes CFLAGS += -I/usr/include/nvidia-current CFLAGS += -I/opt/AMDAPP/include ifeq ($(WORDSIZE), 32) CFLAGS += -O3 -fPIC -m32 endif ifeq ($(WORDSIZE), 64) CFLAGS += -O3 -fPIC -m64 endif LD_SHARED := $(CC) -shared LDFLAGS += -lOpenCL endif ifeq ($(OSNAME), Darwin) MAC_OS_X = Yes ifeq ($(WORDSIZE), 32) CFLAGS += -O3 -fPIC -m32 -DDARWIN -no-cpp-precomp LD_SHARED := $(CC) -m32 -bundle -flat_namespace -undefined suppress endif ifeq ($(WORDSIZE), 64) CFLAGS += -O3 -fPIC -m64 -DDARWIN -no-cpp-precomp LD_SHARED := $(CC) -m64 -bundle -flat_namespace -undefined suppress endif LDFLAGS += -framework OpenCL endif ############### WINDOWS HACK ifeq ($(MINGW), Yes) EXT = dll CC_OR_NOCL=$(findstring which:,$(shell which cl.exe))$(NOCL) ifeq (, $(CC_OR_NOCL)) ## Use Microsoft CL WIN32_CL = Yes CC=cl.exe MS2C = MSYS2_ARG_CONV_EXCL=* OUT_C = /Fo ifeq ($(OPENCL_DIR), ) OPENCL_DIR = c:/msys64/opt/local/ endif CFLAGS = /FS /Zi /nologo /W1 -DWIN32 -D__WIN32__ CFLAGS += /I$(OPENCL_DIR)/include /I$(ERL_C_INCLUDE_DIR) LD_SHARED=link.exe /DLL OUT_L=/OUT: ifeq ($(WORDSIZE), 32) LDFLAGS += /NOLOGO $(OPENCL_DIR)/lib/x86/OpenCL.lib else CFLAGS += -DWIN_X64 LDFLAGS += /NOLOGO $(OPENCL_DIR)/lib/x64/OpenCL.lib endif else ############## Use mingw-gcc CC=gcc WIN32_GCC = Yes CFLAGS += -D__WIN32__ ifeq ($(OPENCL_DIR), ) OPENCL_DIR = /opt/local/ endif ifeq ($(WORDSIZE), 32) CFLAGS += -shared -I$(OPENCL_DIR)/include -m32 -DWIN32 LDFLAGS += -L$(OPENCL_DIR)/lib/x86 -lOpenCL endif ifeq ($(WORDSIZE), 64) CFLAGS += -shared -I$(OPENCL_DIR)/include -m64 -DWIN32 LDFLAGS += -L$(OPENCL_DIR)/lib/x64 -lOpenCL endif LD_SHARED := $(CC) -shared ## Optimizations is broken on mingw 4.4.0 (it crashes with it on) GCC_VERSION = $(shell gcc -dumpversion) ifneq ($(GCC_VERSION), 4.4.0) CFLAGS += -O3 endif endif endif ############### WINDOWS end CL_NIF = $(PRIVDIR)/cl_nif.$(EXT) CL_NIF_OBJS = \ cl_nif.$(OBJ) \ cl_hash.$(OBJ) CL_NIF_SRC = \ cl_nif.c \ cl_hash.c all: $(MAKE) nif TYPE=release debug: $(MAKE) nif TYPE=debug clean: rm -f $(CL_NIF_OBJS) rm -f $(CL_NIF) release: $(MAKE) nif TYPE=release nif: $(CL_NIF) cl_nif.$(OBJ): cl_hash.h clean_internal: -rm -f *.$(OBJ) -rm -f $(PRIVDIR)/*.$(EXT) ifeq ($(WIN32_CL),No) %.$(OBJ): %.c $(CC) -c $(OUT_C) $@ $(CFLAGS) $< $(CL_NIF): $(OCL_LIB) $(CL_NIF_OBJS) @mkdir -p $(PRIVDIR) $(LD_SHARED) $(OUT_L) $@ $(CL_NIF_OBJS) $(LDFLAGS) else %.$(OBJ): %.c $(MS2C) $(CC) -c $(OUT_C)$@ $(CFLAGS) $< $(CL_NIF): $(OCL_LIB) $(CL_NIF_OBJS) @mkdir -p $(PRIVDIR) $(MS2C) $(LD_SHARED) $(OUT_L)$@ $(CL_NIF_OBJS) $(LDFLAGS) endif cl-cl-1.2.3/c_src/cl_hash.c000066400000000000000000000202701301041406700153420ustar00rootroot00000000000000/****** BEGIN COPYRIGHT ******************************************************* * * Copyright (C) 2007 - 2012, Rogvall Invest AB, * * This software is licensed as described in the file COPYRIGHT, which * you should have received as part of this distribution. The terms * are also available at http://www.rogvall.se/docs/copyright.txt. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYRIGHT file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****** END COPYRIGHT ********************************************************/ /* ** Linear hash */ #include #include #include #include "cl_hash.h" #define LHASH_SZEXP 8 #define LHASH_SEGSZ (1 << LHASH_SZEXP) #define LHASH_SZMASK ((1 << LHASH_SZEXP)-1) #define LHASH_SEG(i) ((i)>>LHASH_SZEXP) #define LHASH_POS(i) ((i)&LHASH_SZMASK) #define LHASH_SEG_LEN 256 /* When growing init segs */ #define LHASH_SEG_INCREAMENT 128 /* Number of segments to grow */ #define LHASH_BUCKET(lh, i) (lh)->seg[LHASH_SEG(i)][LHASH_POS(i)] #define LHASH_IX(lh, hval) \ (((((hval) & (lh)->szm)) < (lh)->p) ? \ ((hval) & (((lh)->szm << 1) | 1)) : \ (((hval) & (lh)->szm))) #ifndef WIN32 #define INLINE inline #else #define INLINE #endif static lhash_bucket_t** lhash_alloc_seg(int seg_sz) { lhash_bucket_t** bp; int sz = sizeof(lhash_bucket_t*)*seg_sz; bp = (lhash_bucket_t**) malloc(sz); memset(bp, 0, sz); return bp; } INLINE static lhash_bucket_t** lhash_HLOOKUP(lhash_t* lh, lhash_value_t hval, void* key) { int ix = LHASH_IX(lh, hval); lhash_bucket_t** bpp = &LHASH_BUCKET(lh, ix); lhash_bucket_t* b = *bpp; while(b != (lhash_bucket_t*) 0) { if ((b->hvalue == hval) && (lh->func.cmp(key, (void*) b) == 0)) return bpp; bpp = &b->next; b = b->next; } return bpp; } /* scan bucket for key return bucket */ INLINE static lhash_bucket_t** lhash_LOOKUP(lhash_t* lh, void* key) { return lhash_HLOOKUP(lh, lh->func.hash(key), key); } lhash_t* lhash_init(lhash_t* lh, char* name, int thres, lhash_func_t* func) { lhash_bucket_t*** bp; if (!(bp = (lhash_bucket_t***) malloc(sizeof(lhash_bucket_t**)))) return 0; lh->func = *func; lh->is_allocated = 0; lh->name = name; lh->thres = thres; lh->szm = LHASH_SZMASK; lh->nactive = LHASH_SEGSZ; lh->nitems = 0; lh->p = 0; lh->nsegs = 1; lh->seg = bp; lh->seg[0] = lhash_alloc_seg(LHASH_SEGSZ); lh->nslots = LHASH_SEGSZ; lh->n_seg_alloc = 1; lh->n_seg_free = 0; lh->n_resize = 0; return lh; } static void lhash_grow(lhash_t* lh) { lhash_bucket_t** bp; lhash_bucket_t** bps; lhash_bucket_t* b; unsigned int ix; unsigned int nszm = (lh->szm << 1) | 1; if (lh->nactive >= lh->nslots) { /* Time to get a new array */ if (LHASH_POS(lh->nactive) == 0) { unsigned int six = LHASH_SEG(lh->nactive); if (six == lh->nsegs) { int i, sz; if (lh->nsegs == 1) sz = LHASH_SEG_LEN; else sz = lh->nsegs + LHASH_SEG_INCREAMENT; lh->seg = (lhash_bucket_t***) realloc(lh->seg, sizeof(lhash_bucket_t**)*sz); lh->nsegs = sz; lh->n_resize++; for (i = six+1; i < sz; i++) lh->seg[i] = 0; } lh->seg[six] = lhash_alloc_seg(LHASH_SEGSZ); lh->nslots += LHASH_SEGSZ; lh->n_seg_alloc++; } } ix = lh->p; bp = &LHASH_BUCKET(lh, ix); ix += (lh->szm+1); bps = &LHASH_BUCKET(lh, ix); b = *bp; while (b != 0) { ix = b->hvalue & nszm; if (ix == lh->p) bp = &b->next; /* object stay */ else { *bp = b->next; /* unlink */ b->next = *bps; /* link */ *bps = b; } b = *bp; } lh->nactive++; if (lh->p == lh->szm) { lh->p = 0; lh->szm = nszm; } else lh->p++; } /* ** Shrink the hash table ** Remove segments if they are empty ** but do not reallocate the segment index table !!! */ static void lhash_shrink(lhash_t* lh) { lhash_bucket_t** bp; if (lh->nactive == LHASH_SEGSZ) return; lh->nactive--; if (lh->p == 0) { lh->szm >>= 1; lh->p = lh->szm; } else lh->p--; bp = &LHASH_BUCKET(lh, lh->p); while(*bp != 0) bp = &(*bp)->next; *bp = LHASH_BUCKET(lh, lh->nactive); LHASH_BUCKET(lh, lh->nactive) = 0; if ((lh->nactive & LHASH_SZMASK) == LHASH_SZMASK) { int six = LHASH_SEG(lh->nactive)+1; free(lh->seg[six]); lh->seg[six] = 0; lh->nslots -= LHASH_SEGSZ; lh->n_seg_free++; } } lhash_t* lhash_new(char* name, int thres, lhash_func_t* func) { lhash_t* tp; if (!(tp = (lhash_t*) malloc(sizeof(lhash_t)))) return 0; if (!lhash_init(tp, name, thres, func)) { free(tp); return 0; } tp->is_allocated = 1; return tp; } void lhash_delete(lhash_t* lh) { lhash_bucket_t*** sp = lh->seg; int n = lh->nsegs; while(n--) { lhash_bucket_t** bp = *sp; if (bp != 0) { int m = LHASH_SEGSZ; while(m--) { lhash_bucket_t* p = *bp++; while(p != 0) { lhash_bucket_t* next = p->next; if (lh->func.release) lh->func.release((void*) p); p = next; } } free(*sp); } sp++; } free(lh->seg); if (lh->is_allocated) free(lh); } void* lhash_insert_new(lhash_t* lh, void* key, void* data) { lhash_value_t hval = lh->func.hash(key); lhash_bucket_t** bpp = lhash_HLOOKUP(lh, hval, key); lhash_bucket_t* b = *bpp; if (b) { /* release data if copy function is not defined */ if (!lh->func.copy) { if (lh->func.release) lh->func.release(data); } return 0; } b = (lhash_bucket_t*) (lh->func.copy ? lh->func.copy(data) : data); b->hvalue = hval; b->next = *bpp; *bpp = b; lh->nitems++; if ((lh->nitems / lh->nactive) >= lh->thres) lhash_grow(lh); return (void*) b; } void* lhash_Insert(lhash_t* lh, void* key, void* data) { lhash_value_t hval = lh->func.hash(key); lhash_bucket_t** bpp = lhash_HLOOKUP(lh, hval, key); lhash_bucket_t* b = *bpp; if (b) { lhash_bucket_t* b_next = b->next; if (lh->func.release) lh->func.release(b); b = (lhash_bucket_t*) (lh->func.copy ? lh->func.copy(data) : data); b->hvalue = hval; b->next = b_next; *bpp = b; } else { b = (lhash_bucket_t*) (lh->func.copy ? lh->func.copy(data) : data); b->hvalue = hval; b->next = 0; *bpp = b; lh->nitems++; if ((lh->nitems / lh->nactive) >= lh->thres) lhash_grow(lh); } return (void*) b; } void* lhash_lookup(lhash_t* lh, void* key) { lhash_bucket_t** bpp = lhash_LOOKUP(lh, key); return *bpp; } /* ** Erase an item */ void* lhash_erase(lhash_t* lh, void* key) { lhash_bucket_t** bpp = lhash_LOOKUP(lh, key); lhash_bucket_t* b = *bpp; if (b) { *bpp = b->next; /* unlink */ if (lh->func.release) lh->func.release((void*) b); lh->nitems--; if ((lh->nitems / lh->nactive) < lh->thres) lhash_shrink(lh); } return (void*)b; } void lhash_each(lhash_t* lh, void (elem)(lhash_t* lh, void* elem, void* arg), void* arg) { int i; int nslots = lh->nslots; for (i = 0; i < nslots; i++) { lhash_bucket_t* list = LHASH_BUCKET(lh, i); while(list) { lhash_bucket_t* next = list->next; elem(lh, (void*) list, arg); list = next; } } } void lhash_info(lhash_t* lh) { unsigned int i; int depth = 0; for (i = 0; i < lh->nslots; i++) { lhash_bucket_t* list = LHASH_BUCKET(lh, i); int d = 0; while(list) { list = list->next; d++; } if (d > depth) depth = d; } printf(" Name: %s\r\n", lh->name); printf(" Size: %d\r\n", lh->szm+1); printf("Active: %d\r\n", lh->nactive); printf(" Split: %d\r\n", lh->p); printf(" Items: %d\r\n", lh->nitems); printf(" Slots: %d\r\n", lh->nslots); printf(" Segs: %d\r\n", lh->nsegs); printf(" Thres: %d\r\n", lh->thres); printf(" Ratio: %e\r\n", (float) lh->nitems / (float) lh->nactive); printf(" Max: %d\r\n", depth); printf("Resize: %d\r\n", lh->n_resize); printf(" Alloc: %d\r\n", lh->n_seg_alloc); printf(" Free: %d\r\n", lh->n_seg_free); } cl-cl-1.2.3/c_src/cl_hash.h000066400000000000000000000050011301041406700153420ustar00rootroot00000000000000/****** BEGIN COPYRIGHT ******************************************************* * * Copyright (C) 2007 - 2012, Rogvall Invest AB, * * This software is licensed as described in the file COPYRIGHT, which * you should have received as part of this distribution. The terms * are also available at http://www.rogvall.se/docs/copyright.txt. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYRIGHT file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****** END COPYRIGHT ********************************************************/ #ifndef __ECL_HASH_H__ #define __ECL_HASH_H__ #include typedef uintptr_t lhash_value_t; typedef struct _lhash_bucket_t { struct _lhash_bucket_t* next; lhash_value_t hvalue; } lhash_bucket_t; typedef struct { lhash_value_t (*hash)(void*); // calculate hash int (*cmp)(void*, void*); // compare data items void (*release)(void*); // data release (free) void* (*copy)(void*); // copy (may be used with insert) } lhash_func_t; typedef struct { lhash_func_t func; // functions int is_allocated; char* name; unsigned int thres; // Medium bucket chain len, for grow unsigned int szm; // current size mask unsigned int nactive; // Number of "active" slots unsigned int nslots; // Total number of slots unsigned int nitems; // Total number of items unsigned int p; // Split position unsigned int nsegs; // Number of segments unsigned int n_resize; // Number of index realloc calls unsigned int n_seg_alloc; // Number of segment allocations unsigned int n_seg_free; // Number of segment destroy lhash_bucket_t*** seg; } lhash_t; extern lhash_t* lhash_new(char* name, int thres, lhash_func_t* func); extern lhash_t* lhash_init(lhash_t* lh, char* name, int thres, lhash_func_t* func); extern void lhash_delete(lhash_t* lh); extern void* lhash_lookup(lhash_t* lh, void* key); extern void* lhash_insert(lhash_t* lh, void* key, void* data); extern void* lhash_insert_new(lhash_t* lh, void* key, void* data); extern void* lhash_erase(lhash_t* lh, void* key); extern void lhash_each(lhash_t* lh, void (elem)(lhash_t* lh, void* elem, void* arg), void* arg); extern void lhash_Info(lhash_t* lh); #endif cl-cl-1.2.3/c_src/cl_nif.c000066400000000000000000006500221301041406700151770ustar00rootroot00000000000000/****** BEGIN COPYRIGHT ******************************************************* * * Copyright (C) 2007 - 2012, Rogvall Invest AB, * * This software is licensed as described in the file COPYRIGHT, which * you should have received as part of this distribution. The terms * are also available at http://www.rogvall.se/docs/copyright.txt. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYRIGHT file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****** END COPYRIGHT ********************************************************/ // // NIF interface for OpenCL binding // #include #ifndef WIN32 #include #include #include #include #include #include #include #else #include #endif #define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1 #ifdef DARWIN #include #else #include #include #endif // Old cl_platform doesn't have the CL_CALLBACK #ifndef CL_CALLBACK #define CL_CALLBACK #endif #ifdef WIN32 typedef cl_bool bool; #define true 1 #define false 0 #endif #ifdef WIN_X64 #define ecl_get_sizet(a1,a2,a3) enif_get_uint64(a1,a2,a3) #define ecl_make_sizet(a1,a2) enif_make_uint64(a1,a2) #else #define ecl_get_sizet(a1,a2,a3) enif_get_ulong(a1,a2,(unsigned long*)a3) #define ecl_make_sizet(a1,a2) enif_make_ulong(a1,a2) #endif #define UNUSED(a) ((void) a) #include "erl_nif.h" #include "cl_hash.h" #define sizeof_array(a) (sizeof(a) / sizeof(a[0])) // #define DEBUG #ifdef DEBUG #include static void ecl_emit_error(char* file, int line, ...); #define DBG(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__) #else #define DBG(...) #endif #define CL_ERROR(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__) // soft limits #define MAX_INFO_SIZE 1024 #define MAX_DEVICES 128 #define MAX_PLATFORMS 128 #define MAX_OPTION_LIST 1024 #define MAX_KERNEL_NAME 1024 #define MAX_KERNELS 1024 #define MAX_SOURCES 128 #define MAX_WAIT_LIST 128 #define MAX_WORK_SIZE 3 #define MAX_IMAGE_FORMATS 128 #define MAX_MEM_OBJECTS 128 // Atom macros #define ATOM(name) atm_##name #define DECL_ATOM(name) \ ERL_NIF_TERM atm_##name = 0 // require env in context (ugly) #define LOAD_ATOM(name) \ atm_##name = enif_make_atom(env,#name) #define LOAD_ATOM_STRING(name,string) \ atm_##name = enif_make_atom(env,string) #ifndef CL_VERSION_1_2 typedef struct _cl_image_desc { cl_mem_object_type image_type; size_t image_width; size_t image_height; size_t image_depth; size_t image_array_size; size_t image_row_pitch; size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; cl_mem buffer; } cl_image_desc; #endif // Wrapper to handle reource atom name etc. typedef struct { char* name; ERL_NIF_TERM type; // resource atom name ErlNifResourceType* res; // the resource type size_t size; // "real" object size } ecl_resource_t; struct _ecl_object_t; typedef struct _ecl_platform_t { struct _ecl_object_t* o_platform; cl_uint ndevices; struct _ecl_object_t** o_device; } ecl_platform_t; typedef struct _ecl_env_t { lhash_t ref; // cl -> ecl ErlNifRWLock* ref_lock; // lhash operation lock cl_uint nplatforms; ecl_platform_t* platform; cl_int icd_version; } ecl_env_t; typedef struct _ecl_object_t { lhash_bucket_t hbucket; // inheritance: map: cl->ecl ecl_env_t* env; cl_int version; struct _ecl_object_t* parent; // parent resource object union { cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_mem mem; cl_sampler sampler; cl_program program; cl_kernel kernel; cl_event event; void* opaque; }; } ecl_object_t; // "inherits" ecl_object_t and add special binary objects (read/write) typedef struct _ecl_event_t { ecl_object_t obj; // FIXED place for inhertiance bool rd; // Read binary operation bool rl; // Do not release if true ErlNifEnv* bin_env; // environment to hold binary term data ErlNifBinary* bin; // read/write data } ecl_event_t; #define KERNEL_ARG_OTHER 0 #define KERNEL_ARG_MEM 1 #define KERNEL_ARG_SAMPLER 2 // This is a special construct inorder to kee typedef struct { int type; // 0=other, 1=mem, 2=samper union { cl_mem mem; cl_sampler sampler; void* other; void* value; }; } ecl_kernel_arg_t; // "inherits" ecl_object_t and reference count kernel args typedef struct _ecl_kernel_t { ecl_object_t obj; // FIXED place for inhertiance cl_uint num_args; // number of arguments used by the kernel ecl_kernel_arg_t* arg; // array of current args } ecl_kernel_t; typedef enum { OCL_CHAR, // cl_char OCL_UCHAR, // cl_uchar OCL_SHORT, // cl_short OCL_USHORT, // cl_ushort OCL_INT, // cl_int OCL_UINT, // cl_uint OCL_LONG, // cl_long OCL_ULONG, // cl_ulong OCL_HALF, // cl_half OCL_FLOAT, // cl_float OCL_DOUBLE, // cl_double OCL_BOOL, // cl_bool OCL_STRING, // cl_char* OCL_BITFIELD, // cl_ulong OCL_ENUM, // cl_int OCL_POINTER, // void* OCL_SIZE, // size_t OCL_PLATFORM, // void* OCL_DEVICE, // void* OCL_CONTEXT, // void* OCL_PROGRAM, // void* OCL_COMMAND_QUEUE, // void* OCL_IMAGE_FORMAT, // cl_image_format #if CL_VERSION_1_2 == 1 OCL_DEVICE_PARTITION, // cl_device_partition_property #endif OCL_NUM_TYPES } ocl_type_t; #define OCL_DEVICE_TYPE OCL_BITFIELD #define OCL_DEVICE_FP_CONFIG OCL_BITFIELD #define OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE OCL_ENUM #define OCL_PLATFORM_INFO OCL_UINT #define OCL_DEVICE_INFO OCL_UINT #define OCL_DEVICE_EXEC_CAPABILITIES OCL_BITFIELD #define OCL_QUEUE_PROPERTIES OCL_BITFIELD #define OCL_DEVICE_LOCAL_MEM_TYPE OCL_ENUM #define OCL_MEM_OBJECT_TYPE OCL_ENUM #define OCL_MEM_FLAGS OCL_BITFIELD #define OCL_SAMPLER_ADDRESSING_MODE OCL_ENUM #define OCL_SAMPLER_FILTER_MODE OCL_ENUM #define OCL_BUILD_STATUS OCL_ENUM #define OCL_DEVICE_DOUBLE_FP_CONFIG OCL_BITFIELD #define OCL_PROGRAM_BINARY_TYPE OCL_ENUM typedef struct { ERL_NIF_TERM* key; ErlNifUInt64 value; } ecl_kv_t; typedef struct { ERL_NIF_TERM* info_key; // Atom cl_uint info_id; // Information bool is_array; // return type is a vector of data ocl_type_t info_type; // info data type void* extern_info; // Encode/Decode enum/bitfields } ecl_info_t; typedef enum { ECL_MESSAGE_STOP, // time to die ECL_MESSAGE_FLUSH, // call clFlush ECL_MESSAGE_FINISH, // call clFinish ECL_MESSAGE_WAIT_FOR_EVENT // call clWaitForEvents (only one event!) } ecl_message_type_t; struct _ecl_thread_t; typedef struct ecl_message_t { ecl_message_type_t type; ErlNifPid sender; // sender pid ErlNifEnv* env; // message environment (ref, bin's etc) ERL_NIF_TERM ref; // ref (in env!) union { ecl_object_t* queue; // ECL_MESSAGE_FLUSH/ECL_MESSAGE_FINISH ecl_event_t* event; // ECL_MESSAGE_WAIT_FOR_EVENT }; } ecl_message_t; typedef struct _ecl_qlink_t { struct _ecl_qlink_t* next; ecl_message_t mesg; } ecl_qlink_t; #define MAX_QLINK 8 // pre-allocated qlinks typedef struct { ErlNifMutex* mtx; ErlNifCond* cv; int len; ecl_qlink_t* front; // pick from front ecl_qlink_t* rear; // insert at rear ecl_qlink_t* free; // free list in ql ecl_qlink_t ql[MAX_QLINK]; // "pre" allocated qlinks } ecl_queue_t; typedef struct _ecl_thread_t { ErlNifTid tid; // thread id ecl_queue_t q; // message queue void* arg; // thread init argument } ecl_thread_t; // "inherits" ecl_object_t and add keep track of the context thread typedef struct _ecl_context_t { ecl_object_t obj; // FIXED place for inhertiance ecl_thread_t* thr; // The context thread } ecl_context_t; static void* ecl_context_main(void* arg); static int ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info); static int ecl_reload(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info); static int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info); static void ecl_unload(ErlNifEnv* env, void* priv_data); static void ecl_load_dynfunctions(ecl_env_t* ecl); static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif typedef cl_mem (* ECL_CREATE_IMAGE)(cl_context,cl_mem_flags, const cl_image_format * , const cl_image_desc *, void *, cl_int *); ECL_CREATE_IMAGE eclCreateImage; static ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels( ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif typedef cl_int (* ECL_UNLOAD_PLATFORM_COMPILER)(cl_platform_id); ECL_UNLOAD_PLATFORM_COMPILER eclUnloadPlatformCompiler; static ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif typedef cl_int (* ECL_ENQUEUE_MARKER_WITH_WAIT_LIST)(cl_command_queue, cl_uint, const cl_event *, cl_event *); ECL_ENQUEUE_MARKER_WITH_WAIT_LIST eclEnqueueMarkerWithWaitList; typedef cl_int (* ECL_ENQUEUE_BARRIER_WITH_WAIT_LIST)(cl_command_queue, cl_uint, const cl_event *, cl_event *); ECL_ENQUEUE_BARRIER_WITH_WAIT_LIST eclEnqueueBarrierWithWaitList; static ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); #endif static ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); // speical version of clWaitForEvents static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ErlNifFunc ecl_funcs[] = { { "noop", 0, ecl_noop }, { "versions", 0, ecl_versions }, // Platform { "get_platform_ids", 0, ecl_get_platform_ids }, { "get_platform_info", 2, ecl_get_platform_info }, // Devices { "get_device_ids", 2, ecl_get_device_ids }, #if CL_VERSION_1_2 == 1 { "create_sub_devices", 2, ecl_create_sub_devices }, #endif { "get_device_info", 2, ecl_get_device_info }, // Context { "create_context", 1, ecl_create_context }, { "get_context_info", 2, ecl_get_context_info }, // Command queue { "create_queue", 3, ecl_create_queue }, { "get_queue_info", 2, ecl_get_queue_info }, // Memory object { "create_buffer", 4, ecl_create_buffer }, #if CL_VERSION_1_1 == 1 { "create_sub_buffer", 4, ecl_create_sub_buffer }, #endif { "get_mem_object_info", 2, ecl_get_mem_object_info }, { "get_image_info", 2, ecl_get_image_info }, { "create_image2d", 7, ecl_create_image2d }, { "create_image3d", 9, ecl_create_image3d }, #if CL_VERSION_1_2 == 1 { "create_image", 5, ecl_create_image }, #endif { "get_supported_image_formats",3, ecl_get_supported_image_formats }, // Sampler { "create_sampler", 4, ecl_create_sampler }, { "get_sampler_info", 2, ecl_get_sampler_info }, // Program { "create_program_with_source", 2, ecl_create_program_with_source }, { "create_program_with_binary", 3, ecl_create_program_with_binary }, #if CL_VERSION_1_2 == 1 { "create_program_with_builtin_kernels", 3, ecl_create_program_with_builtin_kernels }, #endif { "async_build_program", 3, ecl_async_build_program }, #if CL_VERSION_1_2 == 1 { "unload_platform_compiler", 1, ecl_unload_platform_compiler }, #endif #if CL_VERSION_1_2 == 1 { "async_compile_program", 5, ecl_async_compile_program }, #endif #if CL_VERSION_1_2 == 1 { "async_link_program", 4, ecl_async_link_program }, #endif { "unload_compiler", 0, ecl_unload_compiler }, { "get_program_info", 2, ecl_get_program_info }, { "get_program_build_info", 3, ecl_get_program_build_info }, // Kernel { "create_kernel", 2, ecl_create_kernel }, { "create_kernels_in_program", 1, ecl_create_kernels_in_program }, { "set_kernel_arg", 3, ecl_set_kernel_arg }, { "set_kernel_arg_size", 3, ecl_set_kernel_arg_size }, { "get_kernel_info", 2, ecl_get_kernel_info }, { "get_kernel_workgroup_info", 3, ecl_get_kernel_workgroup_info }, #if CL_VERSION_1_2 == 1 { "get_kernel_arg_info", 3, ecl_get_kernel_arg_info }, #endif // Events { "enqueue_task", 4, ecl_enqueue_task }, { "enqueue_nd_range_kernel", 6, ecl_enqueue_nd_range_kernel }, { "enqueue_marker", 1, ecl_enqueue_marker }, { "enqueue_barrier", 1, ecl_enqueue_barrier }, #if CL_VERSION_1_2 == 1 { "enqueue_barrier_with_wait_list", 2, ecl_enqueue_barrier_with_wait_list }, { "enqueue_marker_with_wait_list", 2, ecl_enqueue_marker_with_wait_list }, #endif { "enqueue_wait_for_events", 2, ecl_enqueue_wait_for_events }, { "enqueue_read_buffer", 5, ecl_enqueue_read_buffer }, #if CL_VERSION_1_1 == 1 { "enqueue_read_buffer_rect", 10, ecl_enqueue_read_buffer_rect }, #endif { "enqueue_write_buffer", 7, ecl_enqueue_write_buffer }, #if CL_VERSION_1_1 == 1 { "enqueue_write_buffer_rect", 11, ecl_enqueue_write_buffer_rect }, #endif #if CL_VERSION_1_2 == 1 { "enqueue_fill_buffer", 6, ecl_enqueue_fill_buffer }, #endif { "enqueue_read_image", 7, ecl_enqueue_read_image }, { "enqueue_write_image", 9, ecl_enqueue_write_image }, { "enqueue_copy_buffer", 7, ecl_enqueue_copy_buffer }, #if CL_VERSION_1_1 == 1 { "enqueue_copy_buffer_rect", 11, ecl_enqueue_copy_buffer_rect }, #endif { "enqueue_copy_image", 6, ecl_enqueue_copy_image }, #if CL_VERSION_1_2 == 1 { "enqueue_fill_image", 6, ecl_enqueue_fill_image }, #endif { "enqueue_copy_image_to_buffer", 7, ecl_enqueue_copy_image_to_buffer }, { "enqueue_copy_buffer_to_image", 7, ecl_enqueue_copy_buffer_to_image }, { "enqueue_map_buffer", 6, ecl_enqueue_map_buffer }, { "enqueue_map_image", 6, ecl_enqueue_map_image }, { "enqueue_unmap_mem_object", 3, ecl_enqueue_unmap_mem_object }, #if CL_VERSION_1_2 == 1 { "enqueue_migrate_mem_objects", 4, ecl_enqueue_migrate_mem_objects }, #endif { "async_flush", 1, ecl_async_flush }, { "async_finish", 1, ecl_async_finish }, { "async_wait_for_event", 1, ecl_async_wait_for_event }, { "get_event_info", 2, ecl_get_event_info } }; static ecl_resource_t platform_r; static ecl_resource_t device_r; static ecl_resource_t context_r; static ecl_resource_t command_queue_r; static ecl_resource_t mem_r; static ecl_resource_t sampler_r; static ecl_resource_t program_r; static ecl_resource_t kernel_r; static ecl_resource_t event_r; // General atoms DECL_ATOM(ok); DECL_ATOM(error); DECL_ATOM(unknown); DECL_ATOM(undefined); DECL_ATOM(true); DECL_ATOM(false); // async messages DECL_ATOM(cl_async); DECL_ATOM(cl_event); // Type names DECL_ATOM(platform_t); DECL_ATOM(device_t); DECL_ATOM(context_t); DECL_ATOM(command_queue_t); DECL_ATOM(mem_t); DECL_ATOM(sampler_t); DECL_ATOM(program_t); DECL_ATOM(kernel_t); DECL_ATOM(event_t); // 'cl' type names DECL_ATOM(char); DECL_ATOM(char2); DECL_ATOM(char4); DECL_ATOM(char8); DECL_ATOM(char16); DECL_ATOM(uchar); DECL_ATOM(uchar2); DECL_ATOM(uchar4); DECL_ATOM(uchar8); DECL_ATOM(uchar16); DECL_ATOM(short); DECL_ATOM(short2); DECL_ATOM(short4); DECL_ATOM(short8); DECL_ATOM(short16); DECL_ATOM(ushort); DECL_ATOM(ushort2); DECL_ATOM(ushort4); DECL_ATOM(ushort8); DECL_ATOM(ushort16); DECL_ATOM(int); DECL_ATOM(int2); DECL_ATOM(int4); DECL_ATOM(int8); DECL_ATOM(int16); DECL_ATOM(uint); DECL_ATOM(uint2); DECL_ATOM(uint4); DECL_ATOM(uint8); DECL_ATOM(uint16); DECL_ATOM(long); DECL_ATOM(long2); DECL_ATOM(long4); DECL_ATOM(long8); DECL_ATOM(long16); DECL_ATOM(ulong); DECL_ATOM(ulong2); DECL_ATOM(ulong4); DECL_ATOM(ulong8); DECL_ATOM(ulong16); DECL_ATOM(half); DECL_ATOM(float); DECL_ATOM(float2); DECL_ATOM(float4); DECL_ATOM(float8); DECL_ATOM(float16); DECL_ATOM(double); DECL_ATOM(double2); DECL_ATOM(double4); DECL_ATOM(double8); DECL_ATOM(double16); // records for image creation DECL_ATOM(cl_image_desc); DECL_ATOM(cl_image_format); // Platform info // DECL_ATOM(profile); // DECL_ATOM(version); // DECL_ATOM(name); // DECL_ATOM(vendor); // DECL_ATOM(extensions); // Context info DECL_ATOM(reference_count); DECL_ATOM(devices); DECL_ATOM(properties); // Queue info DECL_ATOM(context); DECL_ATOM(num_devices); DECL_ATOM(device); // DECL_ATOM(reference_count); // DECL_ATOM(properties); // Mem info DECL_ATOM(object_type); DECL_ATOM(flags); DECL_ATOM(size); DECL_ATOM(host_ptr); DECL_ATOM(map_count); // DECL_ATOM(reference_count); // DECL_ATOM(context); // Image info DECL_ATOM(format); DECL_ATOM(element_size); DECL_ATOM(row_pitch); DECL_ATOM(slice_pitch); DECL_ATOM(width); DECL_ATOM(height); DECL_ATOM(depth); // Sampler info // DECL_ATOM(reference_count); // DECL_ATOM(context); DECL_ATOM(normalized_coords); DECL_ATOM(addressing_mode); DECL_ATOM(filter_mode); // Program info // DECL_ATOM(reference_count); // DECL_ATOM(context); DECL_ATOM(num_decices); // DECL_ATOM(devices); DECL_ATOM(source); DECL_ATOM(binary_sizes); DECL_ATOM(binaries); // Build Info DECL_ATOM(status); DECL_ATOM(options); DECL_ATOM(log); DECL_ATOM(binary_type); // Kernel Info DECL_ATOM(function_name); DECL_ATOM(num_args); // DECL_ATOM(reference_count); // DECL_ATOM(context); DECL_ATOM(program); // Event Info DECL_ATOM(command_queue); DECL_ATOM(command_type); // DECL_ATOM(reference_count); DECL_ATOM(execution_status); // Workgroup info DECL_ATOM(work_group_size); DECL_ATOM(compile_work_group_size); // DECL_ATOM(local_mem_size); DECL_ATOM(preferred_work_group_size_multiple); DECL_ATOM(private_mem_size); DECL_ATOM(global_work_size); // Error codes DECL_ATOM(device_not_found); DECL_ATOM(device_not_available); DECL_ATOM(compiler_not_available); DECL_ATOM(mem_object_allocation_failure); DECL_ATOM(out_of_resources); DECL_ATOM(out_of_host_memory); DECL_ATOM(profiling_info_not_available); DECL_ATOM(mem_copy_overlap); DECL_ATOM(image_format_mismatch); DECL_ATOM(image_format_not_supported); DECL_ATOM(build_program_failure); DECL_ATOM(map_failure); DECL_ATOM(invalid_value); DECL_ATOM(invalid_device_type); DECL_ATOM(invalid_platform); DECL_ATOM(invalid_device); DECL_ATOM(invalid_context); DECL_ATOM(invalid_queue_properties); DECL_ATOM(invalid_command_queue); DECL_ATOM(invalid_host_ptr); DECL_ATOM(invalid_mem_object); DECL_ATOM(invalid_image_format_descriptor); DECL_ATOM(invalid_image_size); DECL_ATOM(invalid_sampler); DECL_ATOM(invalid_binary); DECL_ATOM(invalid_build_options); DECL_ATOM(invalid_program); DECL_ATOM(invalid_program_executable); DECL_ATOM(invalid_kernel_name); DECL_ATOM(invalid_kernel_definition); DECL_ATOM(invalid_kernel); DECL_ATOM(invalid_arg_index); DECL_ATOM(invalid_arg_value); DECL_ATOM(invalid_arg_size); DECL_ATOM(invalid_kernel_args); DECL_ATOM(invalid_work_dimension); DECL_ATOM(invalid_work_group_size); DECL_ATOM(invalid_work_item_size); DECL_ATOM(invalid_global_offset); DECL_ATOM(invalid_event_wait_list); DECL_ATOM(invalid_event); DECL_ATOM(invalid_operation); DECL_ATOM(invalid_gl_object); DECL_ATOM(invalid_buffer_size); DECL_ATOM(invalid_mip_level); DECL_ATOM(invalid_global_work_size); // cl_device_type DECL_ATOM(all); DECL_ATOM(default); DECL_ATOM(cpu); DECL_ATOM(gpu); DECL_ATOM(accelerator); DECL_ATOM(custom); // fp_config DECL_ATOM(denorm); DECL_ATOM(inf_nan); DECL_ATOM(round_to_nearest); DECL_ATOM(round_to_zero); DECL_ATOM(round_to_inf); DECL_ATOM(fma); DECL_ATOM(soft_float); DECL_ATOM(correctly_rounded_divide_sqrt); // mem_cache_type DECL_ATOM(none); DECL_ATOM(read_only); DECL_ATOM(read_write); // local_mem_type DECL_ATOM(local); DECL_ATOM(global); // exec capability DECL_ATOM(kernel); DECL_ATOM(native_kernel); // command_queue_properties DECL_ATOM(out_of_order_exec_mode_enable); DECL_ATOM(profiling_enable); // mem_flags // DECL_ATOM(read_write); DECL_ATOM(write_only); // DECL_ATOM(read_only); DECL_ATOM(use_host_ptr); DECL_ATOM(alloc_host_ptr); DECL_ATOM(copy_host_ptr); // migration flags DECL_ATOM(host); DECL_ATOM(content_undefined); // mem_object_type DECL_ATOM(buffer); DECL_ATOM(image2d); DECL_ATOM(image3d); // version1.2 DECL_ATOM(image2d_array); DECL_ATOM(image1d); DECL_ATOM(image1d_array); DECL_ATOM(image1d_buffer); // addressing_mode // DECL_ATOM(none); DECL_ATOM(clamp_to_edge); DECL_ATOM(clamp); DECL_ATOM(repeat); // filter_mode DECL_ATOM(nearest); DECL_ATOM(linear); // map_flags DECL_ATOM(read); DECL_ATOM(write); // build_status DECL_ATOM(success); // DECL_ATOM(none); // DECL_ATOM(error); DECL_ATOM(in_progress); // program_binary_type // DECL_ATOM(none); DECL_ATOM(compiled_object); DECL_ATOM(library); DECL_ATOM(executable); // command_type DECL_ATOM(ndrange_kernel); DECL_ATOM(task); // DECL_ATOM(native_kernel); DECL_ATOM(read_buffer); DECL_ATOM(write_buffer); DECL_ATOM(copy_buffer); DECL_ATOM(read_image); DECL_ATOM(write_image); DECL_ATOM(copy_image); DECL_ATOM(copy_image_to_buffer); DECL_ATOM(copy_buffer_to_image); DECL_ATOM(map_buffer); DECL_ATOM(map_image); DECL_ATOM(unmap_mem_object); DECL_ATOM(marker); DECL_ATOM(aquire_gl_objects); DECL_ATOM(release_gl_objects); DECL_ATOM(migreate_mem_objects); DECL_ATOM(fill_buffer); DECL_ATOM(fill_image); // execution_status DECL_ATOM(complete); DECL_ATOM(running); DECL_ATOM(submitted); DECL_ATOM(queued); // arguments DECL_ATOM(region); // DECL_ATOM(global); // DECL_ATOM(local); DECL_ATOM(constant); DECL_ATOM(private); // DECL_ATOM(read_only); // DECL_ATOM(write_only); // DECL_ATOM(read_write); // DECL_ATOM(none); // DECL_ATOM(none); DECL_ATOM(const); DECL_ATOM(restrict); DECL_ATOM(volatile); DECL_ATOM(address_qualifier); DECL_ATOM(access_qualifier); DECL_ATOM(type_name); DECL_ATOM(type_qualifier); // DECL_ATOM(name); #define SIZE_1 0x010000 #define SIZE_2 0x020000 #define SIZE_4 0x040000 #define SIZE_8 0x080000 #define SIZE_16 0x100000 ecl_kv_t kv_cl_type[] = { { &ATOM(char), SIZE_1 + OCL_CHAR }, { &ATOM(char2), SIZE_2 + OCL_CHAR }, { &ATOM(char4), SIZE_4 + OCL_CHAR }, { &ATOM(char8), SIZE_8 + OCL_CHAR }, { &ATOM(char16), SIZE_16 + OCL_CHAR }, { &ATOM(uchar), SIZE_1 + OCL_UCHAR }, { &ATOM(uchar2), SIZE_2 + OCL_UCHAR }, { &ATOM(uchar4), SIZE_4 + OCL_UCHAR }, { &ATOM(uchar8), SIZE_8 + OCL_UCHAR }, { &ATOM(uchar16), SIZE_16 + OCL_UCHAR }, { &ATOM(short), SIZE_1 + OCL_SHORT }, { &ATOM(short2), SIZE_2 + OCL_SHORT }, { &ATOM(short4), SIZE_4 + OCL_SHORT }, { &ATOM(short8), SIZE_8 + OCL_SHORT }, { &ATOM(short16), SIZE_16 + OCL_SHORT }, { &ATOM(ushort), SIZE_1 + OCL_USHORT }, { &ATOM(ushort2), SIZE_2 + OCL_USHORT }, { &ATOM(ushort4), SIZE_4 + OCL_USHORT }, { &ATOM(ushort8), SIZE_8 + OCL_USHORT }, { &ATOM(ushort16), SIZE_16 + OCL_USHORT }, { &ATOM(int), SIZE_1 + OCL_INT }, { &ATOM(int2), SIZE_2 + OCL_INT }, { &ATOM(int4), SIZE_4 + OCL_INT }, { &ATOM(int8), SIZE_8 + OCL_INT }, { &ATOM(int16), SIZE_16 + OCL_INT }, { &ATOM(uint), SIZE_1 + OCL_UINT }, { &ATOM(uint2), SIZE_2 + OCL_UINT }, { &ATOM(uint4), SIZE_4 + OCL_UINT }, { &ATOM(uint8), SIZE_8 + OCL_UINT }, { &ATOM(uint16), SIZE_16 + OCL_UINT }, { &ATOM(long), SIZE_1 + OCL_LONG }, { &ATOM(long2), SIZE_2 + OCL_LONG }, { &ATOM(long4), SIZE_4 + OCL_LONG }, { &ATOM(long8), SIZE_8 + OCL_LONG }, { &ATOM(long16), SIZE_16 + OCL_LONG }, { &ATOM(ulong), SIZE_1 + OCL_ULONG }, { &ATOM(ulong2), SIZE_2 + OCL_ULONG }, { &ATOM(ulong4), SIZE_4 + OCL_ULONG }, { &ATOM(ulong8), SIZE_8 + OCL_ULONG }, { &ATOM(ulong16), SIZE_16 + OCL_ULONG }, { &ATOM(half), SIZE_1 + OCL_HALF }, { &ATOM(float), SIZE_1 + OCL_FLOAT }, { &ATOM(float2), SIZE_2 + OCL_FLOAT }, { &ATOM(float4), SIZE_4 + OCL_FLOAT }, { &ATOM(float8), SIZE_8 + OCL_FLOAT }, { &ATOM(float16), SIZE_16 + OCL_FLOAT }, { &ATOM(double), SIZE_1 + OCL_DOUBLE }, { &ATOM(double2), SIZE_2 + OCL_DOUBLE }, { &ATOM(double4), SIZE_4 + OCL_DOUBLE }, { &ATOM(double8), SIZE_8 + OCL_DOUBLE }, { &ATOM(double16), SIZE_16 + OCL_DOUBLE }, { 0, 0 } }; ecl_kv_t kv_device_type[] = { // bitfield { &ATOM(cpu), CL_DEVICE_TYPE_CPU }, { &ATOM(gpu), CL_DEVICE_TYPE_GPU }, { &ATOM(accelerator), CL_DEVICE_TYPE_ACCELERATOR }, { &ATOM(default), CL_DEVICE_TYPE_DEFAULT }, { &ATOM(all), CL_DEVICE_TYPE_ALL }, #if CL_VERSION_1_2 == 1 { &ATOM(custom), CL_DEVICE_TYPE_CUSTOM }, #endif { 0, 0} }; ecl_kv_t kv_fp_config[] = { // bitfield { &ATOM(denorm), CL_FP_DENORM }, { &ATOM(inf_nan), CL_FP_INF_NAN }, { &ATOM(round_to_nearest), CL_FP_ROUND_TO_NEAREST }, { &ATOM(round_to_zero), CL_FP_ROUND_TO_ZERO }, { &ATOM(round_to_inf), CL_FP_ROUND_TO_INF }, { &ATOM(fma), CL_FP_FMA }, #if CL_VERSION_1_2 == 1 { &ATOM(soft_float), CL_FP_SOFT_FLOAT }, { &ATOM(correctly_rounded_divide_sqrt),CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT}, #endif { 0, 0 } }; ecl_kv_t kv_mem_cache_type[] = { // enum { &ATOM(none), CL_NONE }, { &ATOM(read_only), CL_READ_ONLY_CACHE }, { &ATOM(read_write), CL_READ_WRITE_CACHE }, { 0, 0 } }; ecl_kv_t kv_local_mem_type[] = { // enum { &ATOM(local), CL_LOCAL }, { &ATOM(global), CL_GLOBAL }, { 0, 0 } }; ecl_kv_t kv_exec_capabilities[] = { // bit field { &ATOM(kernel), CL_EXEC_KERNEL }, { &ATOM(native_kernel), CL_EXEC_NATIVE_KERNEL }, { 0, 0 } }; ecl_kv_t kv_command_queue_properties[] = { // bit field { &ATOM(out_of_order_exec_mode_enable), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE }, { &ATOM(profiling_enable), CL_QUEUE_PROFILING_ENABLE }, { 0, 0} }; ecl_kv_t kv_mem_flags[] = { // bit field { &ATOM(read_write), CL_MEM_READ_WRITE }, { &ATOM(write_only), CL_MEM_WRITE_ONLY }, { &ATOM(read_only), CL_MEM_READ_ONLY }, { &ATOM(use_host_ptr), CL_MEM_USE_HOST_PTR }, { &ATOM(alloc_host_ptr), CL_MEM_ALLOC_HOST_PTR }, { &ATOM(copy_host_ptr), CL_MEM_COPY_HOST_PTR }, { 0, 0 } }; #if CL_VERSION_1_2 == 1 ecl_kv_t kv_migration_flags[] = { // bit field { &ATOM(host), CL_MIGRATE_MEM_OBJECT_HOST }, { &ATOM(content_undefined), CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED}, { 0, 0 } }; #endif ecl_kv_t kv_mem_object_type[] = { // enum { &ATOM(buffer), CL_MEM_OBJECT_BUFFER }, { &ATOM(image2d), CL_MEM_OBJECT_IMAGE2D }, { &ATOM(image3d), CL_MEM_OBJECT_IMAGE3D }, #if CL_VERSION_1_2 == 1 { &ATOM(image2d_array), CL_MEM_OBJECT_IMAGE2D_ARRAY }, { &ATOM(image1d), CL_MEM_OBJECT_IMAGE1D }, { &ATOM(image1d_array), CL_MEM_OBJECT_IMAGE1D_ARRAY }, { &ATOM(image1d_buffer), CL_MEM_OBJECT_IMAGE1D_BUFFER }, #endif { 0, 0 } }; ecl_kv_t kv_addressing_mode[] = { // enum { &ATOM(none), CL_ADDRESS_NONE }, { &ATOM(clamp_to_edge), CL_ADDRESS_CLAMP_TO_EDGE }, { &ATOM(clamp), CL_ADDRESS_CLAMP }, { &ATOM(repeat), CL_ADDRESS_REPEAT }, { 0, 0 } }; ecl_kv_t kv_filter_mode[] = { // enum { &ATOM(nearest), CL_FILTER_NEAREST }, { &ATOM(linear), CL_FILTER_LINEAR }, { 0, 0 } }; ecl_kv_t kv_map_flags[] = { // bitfield { &ATOM(read), CL_MAP_READ }, { &ATOM(write), CL_MAP_WRITE }, { 0, 0 } }; ecl_kv_t kv_build_status[] = { // enum { &ATOM(success), CL_BUILD_SUCCESS }, { &ATOM(none), CL_BUILD_NONE }, { &ATOM(error), CL_BUILD_ERROR }, { &ATOM(in_progress), CL_BUILD_IN_PROGRESS }, { 0, 0 } }; #if CL_VERSION_1_2 == 1 ecl_kv_t kv_program_binary_type[] = { // enum { &ATOM(none), CL_PROGRAM_BINARY_TYPE_NONE }, { &ATOM(compiled_object), CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT }, { &ATOM(library), CL_PROGRAM_BINARY_TYPE_LIBRARY }, { &ATOM(executable), CL_PROGRAM_BINARY_TYPE_EXECUTABLE }, { 0, 0 } }; #endif ecl_kv_t kv_command_type[] = { // enum { &ATOM(ndrange_kernel), CL_COMMAND_NDRANGE_KERNEL }, { &ATOM(task), CL_COMMAND_TASK }, { &ATOM(native_kernel), CL_COMMAND_NATIVE_KERNEL }, { &ATOM(read_buffer), CL_COMMAND_READ_BUFFER }, { &ATOM(write_buffer), CL_COMMAND_WRITE_BUFFER }, { &ATOM(copy_buffer), CL_COMMAND_COPY_BUFFER }, { &ATOM(read_image), CL_COMMAND_READ_IMAGE }, { &ATOM(write_image), CL_COMMAND_WRITE_IMAGE }, { &ATOM(copy_image), CL_COMMAND_COPY_IMAGE }, { &ATOM(copy_image_to_buffer), CL_COMMAND_COPY_IMAGE_TO_BUFFER }, { &ATOM(copy_buffer_to_image), CL_COMMAND_COPY_BUFFER_TO_IMAGE }, { &ATOM(map_buffer), CL_COMMAND_MAP_BUFFER }, { &ATOM(map_image), CL_COMMAND_MAP_IMAGE }, { &ATOM(unmap_mem_object), CL_COMMAND_UNMAP_MEM_OBJECT }, { &ATOM(marker), CL_COMMAND_MARKER }, { &ATOM(aquire_gl_objects), CL_COMMAND_ACQUIRE_GL_OBJECTS }, { &ATOM(release_gl_objects), CL_COMMAND_RELEASE_GL_OBJECTS }, #if CL_VERSION_12 == 1 { &ATOM(migreate_mem_objects), CL_COMMAND_MIGRATE_MEM_OBJECTS }, { &ATOM(fill_buffer), CL_COMMAND_FILL_BUFFER }, { &ATOM(fill_image), CL_COMMAND_FILL_IMAGE }, #endif { 0, 0} }; ecl_kv_t kv_execution_status[] = { // enum { &ATOM(complete), CL_COMPLETE }, // same as CL_SUCCESS { &ATOM(running), CL_RUNNING }, { &ATOM(submitted), CL_SUBMITTED }, { &ATOM(queued), CL_QUEUED }, // the error codes (negative values) { &ATOM(device_not_found), CL_DEVICE_NOT_FOUND }, { &ATOM(device_not_available), CL_DEVICE_NOT_AVAILABLE }, { &ATOM(compiler_not_available), CL_COMPILER_NOT_AVAILABLE }, { &ATOM(mem_object_allocation_failure), CL_MEM_OBJECT_ALLOCATION_FAILURE }, { &ATOM(out_of_resources), CL_OUT_OF_RESOURCES }, { &ATOM(out_of_host_memory), CL_OUT_OF_HOST_MEMORY }, { &ATOM(profiling_info_not_available), CL_PROFILING_INFO_NOT_AVAILABLE }, { &ATOM(mem_copy_overlap), CL_MEM_COPY_OVERLAP }, { &ATOM(image_format_mismatch), CL_IMAGE_FORMAT_MISMATCH }, { &ATOM(image_format_not_supported), CL_IMAGE_FORMAT_NOT_SUPPORTED }, { &ATOM(build_program_failure), CL_BUILD_PROGRAM_FAILURE }, { &ATOM(map_failure), CL_MAP_FAILURE }, { &ATOM(invalid_value), CL_INVALID_VALUE }, { &ATOM(invalid_device_type), CL_INVALID_DEVICE_TYPE }, { &ATOM(invalid_platform), CL_INVALID_PLATFORM }, { &ATOM(invalid_device), CL_INVALID_DEVICE }, { &ATOM(invalid_context), CL_INVALID_CONTEXT }, { &ATOM(invalid_queue_properties), CL_INVALID_QUEUE_PROPERTIES }, { &ATOM(invalid_command_queue), CL_INVALID_COMMAND_QUEUE }, { &ATOM(invalid_host_ptr), CL_INVALID_HOST_PTR }, { &ATOM(invalid_mem_object), CL_INVALID_MEM_OBJECT }, { &ATOM(invalid_image_format_descriptor), CL_INVALID_IMAGE_FORMAT_DESCRIPTOR }, { &ATOM(invalid_image_size), CL_INVALID_IMAGE_SIZE }, { &ATOM(invalid_sampler), CL_INVALID_SAMPLER }, { &ATOM(invalid_binary), CL_INVALID_BINARY }, { &ATOM(invalid_build_options), CL_INVALID_BUILD_OPTIONS }, { &ATOM(invalid_program), CL_INVALID_PROGRAM }, { &ATOM(invalid_program_executable), CL_INVALID_PROGRAM_EXECUTABLE }, { &ATOM(invalid_kernel_name), CL_INVALID_KERNEL_NAME }, { &ATOM(invalid_kernel_definition), CL_INVALID_KERNEL_DEFINITION }, { &ATOM(invalid_kernel), CL_INVALID_KERNEL }, { &ATOM(invalid_arg_index), CL_INVALID_ARG_INDEX }, { &ATOM(invalid_arg_value), CL_INVALID_ARG_VALUE }, { &ATOM(invalid_arg_size), CL_INVALID_ARG_SIZE }, { &ATOM(invalid_kernel_args), CL_INVALID_KERNEL_ARGS }, { &ATOM(invalid_work_dimension), CL_INVALID_WORK_DIMENSION }, { &ATOM(invalid_work_group_size), CL_INVALID_WORK_GROUP_SIZE }, { &ATOM(invalid_work_item_size), CL_INVALID_WORK_ITEM_SIZE }, { &ATOM(invalid_global_offset), CL_INVALID_GLOBAL_OFFSET }, { &ATOM(invalid_event_wait_list), CL_INVALID_EVENT_WAIT_LIST }, { &ATOM(invalid_event), CL_INVALID_EVENT }, { &ATOM(invalid_operation), CL_INVALID_OPERATION }, { &ATOM(invalid_gl_object), CL_INVALID_GL_OBJECT }, { &ATOM(invalid_buffer_size), CL_INVALID_BUFFER_SIZE }, { &ATOM(invalid_mip_level), CL_INVALID_MIP_LEVEL }, { &ATOM(invalid_global_work_size), CL_INVALID_GLOBAL_WORK_SIZE }, { 0, 0 } }; DECL_ATOM(snorm_int8); DECL_ATOM(snorm_int16); DECL_ATOM(unorm_int8); DECL_ATOM(unorm_int16); DECL_ATOM(unorm_int24); DECL_ATOM(unorm_short_565); DECL_ATOM(unorm_short_555); DECL_ATOM(unorm_int_101010); DECL_ATOM(signed_int8); DECL_ATOM(signed_int16); DECL_ATOM(signed_int32); DECL_ATOM(unsigned_int8); DECL_ATOM(unsigned_int16); DECL_ATOM(unsigned_int32); DECL_ATOM(half_float); // DECL_ATOM(float); ecl_kv_t kv_channel_type[] = { // enum { &ATOM(snorm_int8), CL_SNORM_INT8 }, { &ATOM(snorm_int16), CL_SNORM_INT16 }, { &ATOM(unorm_int8), CL_UNORM_INT8 }, { &ATOM(unorm_int16), CL_UNORM_INT16 }, { &ATOM(unorm_short_565), CL_UNORM_SHORT_565 }, { &ATOM(unorm_short_555), CL_UNORM_SHORT_555 }, { &ATOM(unorm_int_101010), CL_UNORM_INT_101010 }, { &ATOM(signed_int8), CL_SIGNED_INT8 }, { &ATOM(signed_int16), CL_SIGNED_INT16 }, { &ATOM(signed_int32), CL_SIGNED_INT32 }, { &ATOM(unsigned_int8), CL_UNSIGNED_INT8 }, { &ATOM(unsigned_int16), CL_UNSIGNED_INT16 }, { &ATOM(unsigned_int32), CL_UNSIGNED_INT32 }, { &ATOM(half_float), CL_HALF_FLOAT }, { &ATOM(float), CL_FLOAT }, #if (CL_VERSION_1_2 == 1) && defined(CL_UNORM_INT24) { &ATOM(unorm_int24), CL_UNORM_INT24 }, #endif { 0, 0 } }; // channel order DECL_ATOM(r); DECL_ATOM(a); DECL_ATOM(rg); DECL_ATOM(ra); DECL_ATOM(rgb); DECL_ATOM(rgba); DECL_ATOM(bgra); DECL_ATOM(argb); DECL_ATOM(intensity); DECL_ATOM(luminance); DECL_ATOM(rx); DECL_ATOM(rgx); DECL_ATOM(rgbx); // DECL_ATOM(depth); DECL_ATOM(depth_stencil); // 1.1 features! in apple 1.0? #ifndef CL_Rx #define CL_Rx 0x10BA #endif #ifndef CL_RGx #define CL_RGx 0x10BB #endif #ifndef CL_RGBx #define CL_RGBx 0x10BC #endif ecl_kv_t kv_channel_order[] = { { &ATOM(r), CL_R }, { &ATOM(a), CL_A }, { &ATOM(rg), CL_RG }, { &ATOM(ra), CL_RA }, { &ATOM(rgb), CL_RGB }, { &ATOM(rgba), CL_RGBA }, { &ATOM(bgra), CL_BGRA }, { &ATOM(argb), CL_ARGB }, { &ATOM(intensity), CL_INTENSITY }, { &ATOM(luminance), CL_LUMINANCE }, { &ATOM(rx), CL_Rx }, { &ATOM(rgx), CL_RGx }, { &ATOM(rgbx), CL_RGBx }, #if CL_VERSION_1_2 == 1 #if defined(CL_DEPTH) { &ATOM(depth), CL_DEPTH }, #endif #if defined(CL_DEPTH_STENCIL) { &ATOM(depth_stencil), CL_DEPTH_STENCIL }, #endif #endif { 0, 0 } }; // partition_property DECL_ATOM(equally); DECL_ATOM(by_counts); DECL_ATOM(by_counts_list_end); DECL_ATOM(by_affinity_domain); #if CL_VERSION_1_2 == 1 ecl_kv_t kv_device_partition_property[] = { { &ATOM(equally), CL_DEVICE_PARTITION_EQUALLY }, { &ATOM(by_counts), CL_DEVICE_PARTITION_BY_COUNTS }, { &ATOM(by_affinity_domain), CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN }, { &ATOM(undefined), 0 }, { 0, 0} }; #endif DECL_ATOM(numa); DECL_ATOM(l4_cache); DECL_ATOM(l3_cache); DECL_ATOM(l2_cache); DECL_ATOM(l1_cache); DECL_ATOM(next_partitionable); #if CL_VERSION_1_2 == 1 ecl_kv_t kv_device_affinity_domain[] = { { &ATOM(numa), CL_DEVICE_AFFINITY_DOMAIN_NUMA }, { &ATOM(l4_cache), CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE }, { &ATOM(l3_cache), CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE }, { &ATOM(l2_cache), CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE }, { &ATOM(l1_cache), CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE }, { &ATOM(next_partitionable), CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE }, { &ATOM(undefined), 0 }, { 0, 0} }; #endif // Device info DECL_ATOM(type); DECL_ATOM(vendor_id); DECL_ATOM(max_compute_units); DECL_ATOM(max_work_item_dimensions); DECL_ATOM(max_work_group_size); DECL_ATOM(max_work_item_sizes); DECL_ATOM(preferred_vector_width_char); DECL_ATOM(preferred_vector_width_short); DECL_ATOM(preferred_vector_width_int); DECL_ATOM(preferred_vector_width_long); DECL_ATOM(preferred_vector_width_float); DECL_ATOM(preferred_vector_width_double); DECL_ATOM(max_clock_frequency); DECL_ATOM(address_bits); DECL_ATOM(max_read_image_args); DECL_ATOM(max_write_image_args); DECL_ATOM(max_mem_alloc_size); DECL_ATOM(image2d_max_width); DECL_ATOM(image2d_max_height); DECL_ATOM(image3d_max_width); DECL_ATOM(image3d_max_height); DECL_ATOM(image3d_max_depth); DECL_ATOM(image_support); DECL_ATOM(max_parameter_size); DECL_ATOM(max_samplers); DECL_ATOM(mem_base_addr_align); DECL_ATOM(min_data_type_align_size); DECL_ATOM(single_fp_config); DECL_ATOM(global_mem_cache_type); DECL_ATOM(global_mem_cacheline_size); DECL_ATOM(global_mem_cache_size); DECL_ATOM(global_mem_size); DECL_ATOM(max_constant_buffer_size); DECL_ATOM(max_constant_args); DECL_ATOM(local_mem_type); DECL_ATOM(local_mem_size); DECL_ATOM(error_correction_support); DECL_ATOM(profiling_timer_resolution); DECL_ATOM(endian_little); DECL_ATOM(available); DECL_ATOM(compiler_available); DECL_ATOM(execution_capabilities); DECL_ATOM(queue_properties); DECL_ATOM(name); DECL_ATOM(vendor); DECL_ATOM(driver_version); DECL_ATOM(profile); DECL_ATOM(version); DECL_ATOM(extensions); DECL_ATOM(platform); // cl_khr_fp64 extension || CL_VERSION_1_2 == 1 DECL_ATOM(double_fp_config); // cl_khr_fp16 extension || CL_VERSION_1_2 == 1 DECL_ATOM(half_fp_config); // 1.2 DECL_ATOM(preferred_vector_width_half); DECL_ATOM(host_unified_memory); DECL_ATOM(native_vector_width_char); DECL_ATOM(native_vector_width_short); DECL_ATOM(native_vector_width_int); DECL_ATOM(native_vector_width_long); DECL_ATOM(native_vector_width_float); DECL_ATOM(native_vector_width_double); DECL_ATOM(native_vector_width_half); DECL_ATOM(opencl_c_version); DECL_ATOM(linker_available); DECL_ATOM(built_in_kernels); DECL_ATOM(image_max_buffer_size); DECL_ATOM(image_max_array_size); DECL_ATOM(parent_device); DECL_ATOM(partition_max_sub_devices); DECL_ATOM(partition_properties); DECL_ATOM(partition_affinity_domain); DECL_ATOM(partition_type); // DECL_ATOM(reference_count); DECL_ATOM(preferred_interop_user_sync); DECL_ATOM(printf_buffer_size); DECL_ATOM(image_pitch_alignment); DECL_ATOM(image_base_address_alignment); // cl_nv_device_attribute_query extension DECL_ATOM(compute_capability_major_nv); DECL_ATOM(compute_capability_minor_nv); DECL_ATOM(registers_per_block_nv); DECL_ATOM(warp_size_nv); DECL_ATOM(gpu_overlap_nv); DECL_ATOM(kernel_exec_timeout_nv); DECL_ATOM(device_integrated_memory_nv); // Map device info index 0...N => cl_device_info x Data type ecl_info_t device_info[] = { { &ATOM(type), CL_DEVICE_TYPE, false, OCL_DEVICE_TYPE, kv_device_type }, { &ATOM(vendor_id), CL_DEVICE_VENDOR_ID, false, OCL_UINT, 0 }, { &ATOM(max_compute_units), CL_DEVICE_MAX_COMPUTE_UNITS, false, OCL_UINT, 0 }, { &ATOM(max_work_item_dimensions), CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, false, OCL_UINT, 0 }, { &ATOM(max_work_group_size), CL_DEVICE_MAX_WORK_GROUP_SIZE, false, OCL_SIZE, 0 }, { &ATOM(max_work_item_sizes), CL_DEVICE_MAX_WORK_ITEM_SIZES, true, OCL_SIZE, 0 }, { &ATOM(preferred_vector_width_char), CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, false, OCL_UINT, 0 }, { &ATOM(preferred_vector_width_short), CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, false, OCL_UINT, 0 }, { &ATOM(preferred_vector_width_int), CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, false, OCL_UINT, 0 }, { &ATOM(preferred_vector_width_long), CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, false,OCL_UINT, 0 }, { &ATOM(preferred_vector_width_float), CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, false, OCL_UINT, 0 }, { &ATOM(preferred_vector_width_double), CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, false, OCL_UINT, 0 }, { &ATOM(max_clock_frequency), CL_DEVICE_MAX_CLOCK_FREQUENCY, false, OCL_UINT, 0 }, { &ATOM(address_bits), CL_DEVICE_ADDRESS_BITS, false, OCL_UINT, 0 }, { &ATOM(max_read_image_args), CL_DEVICE_MAX_READ_IMAGE_ARGS, false, OCL_UINT, 0 }, { &ATOM(max_write_image_args), CL_DEVICE_MAX_WRITE_IMAGE_ARGS, false, OCL_UINT, 0 }, { &ATOM(max_mem_alloc_size), CL_DEVICE_MAX_MEM_ALLOC_SIZE, false, OCL_ULONG, 0 }, { &ATOM(image2d_max_width), CL_DEVICE_IMAGE2D_MAX_WIDTH, false, OCL_SIZE, 0 }, { &ATOM(image2d_max_height), CL_DEVICE_IMAGE2D_MAX_HEIGHT, false, OCL_SIZE, 0 }, { &ATOM(image3d_max_width), CL_DEVICE_IMAGE3D_MAX_WIDTH, false, OCL_SIZE, 0 }, { &ATOM(image3d_max_height), CL_DEVICE_IMAGE3D_MAX_HEIGHT, false, OCL_SIZE, 0 }, { &ATOM(image3d_max_depth), CL_DEVICE_IMAGE3D_MAX_DEPTH, false, OCL_SIZE, 0 }, { &ATOM(image_support), CL_DEVICE_IMAGE_SUPPORT, false, OCL_BOOL, 0 }, { &ATOM(max_parameter_size), CL_DEVICE_MAX_PARAMETER_SIZE, false, OCL_SIZE, 0 }, { &ATOM(max_samplers), CL_DEVICE_MAX_SAMPLERS, false, OCL_UINT, 0 }, { &ATOM(mem_base_addr_align), CL_DEVICE_MEM_BASE_ADDR_ALIGN, false, OCL_UINT, 0 }, { &ATOM(min_data_type_align_size), CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, false, OCL_UINT, 0 }, { &ATOM(single_fp_config), CL_DEVICE_SINGLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config }, { &ATOM(global_mem_cache_type), CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, false, OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE, kv_mem_cache_type }, { &ATOM(global_mem_cacheline_size), CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, false, OCL_UINT, 0 }, { &ATOM(global_mem_cache_size), CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, false, OCL_ULONG, 0 }, { &ATOM(global_mem_size), CL_DEVICE_GLOBAL_MEM_SIZE, false, OCL_ULONG, 0 }, { &ATOM(max_constant_buffer_size), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, false, OCL_ULONG, 0 }, { &ATOM(max_constant_args), CL_DEVICE_MAX_CONSTANT_ARGS, false, OCL_UINT, 0 }, { &ATOM(local_mem_type), CL_DEVICE_LOCAL_MEM_TYPE, false, OCL_DEVICE_LOCAL_MEM_TYPE, kv_local_mem_type }, { &ATOM(local_mem_size), CL_DEVICE_LOCAL_MEM_SIZE, false, OCL_ULONG, 0 }, { &ATOM(error_correction_support), CL_DEVICE_ERROR_CORRECTION_SUPPORT, false, OCL_BOOL, 0 }, { &ATOM(profiling_timer_resolution), CL_DEVICE_PROFILING_TIMER_RESOLUTION, false, OCL_SIZE, 0 }, { &ATOM(endian_little), CL_DEVICE_ENDIAN_LITTLE, false, OCL_BOOL, 0}, { &ATOM(available), CL_DEVICE_AVAILABLE, false, OCL_BOOL, 0 }, { &ATOM(compiler_available), CL_DEVICE_COMPILER_AVAILABLE, false, OCL_BOOL, 0 }, { &ATOM(execution_capabilities), CL_DEVICE_EXECUTION_CAPABILITIES, false, OCL_DEVICE_EXEC_CAPABILITIES, kv_exec_capabilities }, { &ATOM(queue_properties), CL_DEVICE_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties }, { &ATOM(name), CL_DEVICE_NAME, false, OCL_STRING, 0 }, { &ATOM(vendor), CL_DEVICE_VENDOR, false, OCL_STRING, 0 }, { &ATOM(driver_version), CL_DRIVER_VERSION, false, OCL_STRING, 0 }, { &ATOM(profile), CL_DEVICE_PROFILE, false, OCL_STRING, 0 }, { &ATOM(version), CL_DEVICE_VERSION, false, OCL_STRING, 0 }, { &ATOM(extensions), CL_DEVICE_EXTENSIONS, false, OCL_STRING, 0 }, { &ATOM(platform), CL_DEVICE_PLATFORM, false, OCL_PLATFORM, 0 }, #if CL_VERSION_1_1 == 1 { &ATOM(preferred_vector_width_half), CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,false, OCL_UINT, 0}, { &ATOM(host_unified_memory), CL_DEVICE_HOST_UNIFIED_MEMORY,false,OCL_BOOL,0}, { &ATOM(native_vector_width_char), CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,false,OCL_UINT, 0}, { &ATOM(native_vector_width_short), CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,false,OCL_UINT, 0}, { &ATOM(native_vector_width_int), CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,false,OCL_UINT, 0}, { &ATOM(native_vector_width_long), CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,false,OCL_UINT, 0}, { &ATOM(native_vector_width_float), CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,false,OCL_UINT, 0}, { &ATOM(native_vector_width_double), CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,false,OCL_UINT, 0}, { &ATOM(native_vector_width_half), CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,false,OCL_UINT, 0}, { &ATOM(opencl_c_version), CL_DEVICE_OPENCL_C_VERSION,false,OCL_STRING, 0}, #endif // cl_khr_fp64 extension || CL_VERSION_1_2 == 1 #if CL_DEVICE_DOUBLE_FP_CONFIG { &ATOM(double_fp_config), CL_DEVICE_DOUBLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config }, #endif // cl_khr_fp16 extension || CL_VERSION_1_2 == 1 #if CL_DEVICE_HALF_FP_CONFIG { &ATOM(half_fp_config), CL_DEVICE_HALF_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config }, #endif #if CL_VERSION_1_2 == 1 { &ATOM(linker_available), CL_DEVICE_LINKER_AVAILABLE,false,OCL_BOOL, 0}, { &ATOM(built_in_kernels), CL_DEVICE_BUILT_IN_KERNELS,false, OCL_STRING, 0}, { &ATOM(image_max_buffer_size), CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,false,OCL_SIZE, 0}, { &ATOM(image_max_array_size), CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,false,OCL_SIZE, 0}, { &ATOM(parent_device), CL_DEVICE_PARENT_DEVICE,false,OCL_DEVICE, 0}, { &ATOM(partition_max_sub_devices), CL_DEVICE_PARTITION_MAX_SUB_DEVICES,false,OCL_SIZE, 0}, { &ATOM(partition_properties), CL_DEVICE_PARTITION_PROPERTIES,true, OCL_ENUM, kv_device_partition_property }, { &ATOM(partition_affinity_domain), CL_DEVICE_PARTITION_AFFINITY_DOMAIN,false,OCL_ENUM, kv_device_affinity_domain }, { &ATOM(partition_type), CL_DEVICE_PARTITION_TYPE, false, OCL_DEVICE_PARTITION, 0}, { &ATOM(reference_count), CL_DEVICE_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(preferred_interop_user_sync), CL_DEVICE_PREFERRED_INTEROP_USER_SYNC,false, OCL_BOOL, 0}, { &ATOM(printf_buffer_size), CL_DEVICE_PRINTF_BUFFER_SIZE,false, OCL_SIZE, 0 }, #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT { &ATOM(image_pitch_alignment), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, false, OCL_SIZE, 0 }, #endif #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT { &ATOM(image_base_address_alignment), CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, false, OCL_SIZE, 0 }, #endif #endif /* cl_nv_device_attribute_query extension */ #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV { &ATOM(compute_capability_major_nv), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, false, OCL_UINT, 0}, #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV { &ATOM(compute_capability_minor_nv), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, false, OCL_UINT, 0}, #endif #ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV { &ATOM(registers_per_block_nv),CL_DEVICE_REGISTERS_PER_BLOCK_NV, false, OCL_UINT, 0}, #endif #ifdef CL_DEVICE_WARP_SIZE_NV { &ATOM(warp_size_nv),CL_DEVICE_WARP_SIZE_NV, false, OCL_UINT, 0}, #endif #ifdef CL_DEVICE_GPU_OVERLAP_NV { &ATOM(gpu_overlap_nv),CL_DEVICE_GPU_OVERLAP_NV, false, OCL_BOOL, 0}, #endif #ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV { &ATOM(kernel_exec_timeout_nv), CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, false, OCL_BOOL, 0}, #endif #ifdef CL_DEVICE_INTEGRATED_MEMORY_NV { &ATOM(device_integrated_memory_nv),CL_DEVICE_INTEGRATED_MEMORY_NV, false, OCL_BOOL, 0}, #endif }; // Map device info index 0...N => cl_device_info x Data type ecl_info_t platform_info[] = { { &ATOM(profile), CL_PLATFORM_PROFILE, false, OCL_STRING, 0 }, { &ATOM(version), CL_PLATFORM_VERSION, false, OCL_STRING, 0 }, { &ATOM(name), CL_PLATFORM_NAME, false, OCL_STRING, 0 }, { &ATOM(vendor), CL_PLATFORM_VENDOR, false, OCL_STRING, 0 }, { &ATOM(extensions), CL_PLATFORM_EXTENSIONS, false, OCL_STRING, 0} }; ecl_info_t context_info[] = { { &ATOM(reference_count), CL_CONTEXT_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(devices), CL_CONTEXT_DEVICES, true, OCL_DEVICE, 0 }, { &ATOM(properties), CL_CONTEXT_PROPERTIES, true, OCL_INT, 0 } }; ecl_info_t queue_info[] = { { &ATOM(context), CL_QUEUE_CONTEXT, false, OCL_CONTEXT, 0 }, { &ATOM(device), CL_QUEUE_DEVICE, false, OCL_DEVICE, 0 }, { &ATOM(reference_count), CL_QUEUE_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(properties), CL_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties } }; ecl_info_t mem_info[] = { { &ATOM(object_type), CL_MEM_TYPE, false, OCL_MEM_OBJECT_TYPE, kv_mem_object_type }, { &ATOM(flags), CL_MEM_FLAGS, false, OCL_MEM_FLAGS, kv_mem_flags }, { &ATOM(size), CL_MEM_SIZE, false, OCL_SIZE, 0 }, // FIXME: pointer!! map it (binary resource?) { &ATOM(host_ptr), CL_MEM_HOST_PTR, false, OCL_POINTER, 0 }, { &ATOM(map_count), CL_MEM_MAP_COUNT, false, OCL_UINT, 0 }, { &ATOM(reference_count), CL_MEM_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(context), CL_MEM_CONTEXT, false, OCL_CONTEXT, 0 } }; ecl_info_t image_info[] = { { &ATOM(format), CL_IMAGE_FORMAT, false, OCL_IMAGE_FORMAT, 0 }, { &ATOM(element_size), CL_IMAGE_ELEMENT_SIZE, false, OCL_SIZE, 0 }, { &ATOM(row_pitch), CL_IMAGE_ROW_PITCH, false, OCL_SIZE, 0 }, { &ATOM(slice_pitch), CL_IMAGE_SLICE_PITCH, false, OCL_SIZE, 0 }, { &ATOM(width), CL_IMAGE_WIDTH, false, OCL_SIZE, 0 }, { &ATOM(height), CL_IMAGE_HEIGHT, false, OCL_SIZE, 0 }, { &ATOM(depth), CL_IMAGE_DEPTH, false, OCL_SIZE, 0 } }; ecl_info_t sampler_info[] = { { &ATOM(reference_count), CL_SAMPLER_REFERENCE_COUNT, false, OCL_UINT, 0}, { &ATOM(context), CL_SAMPLER_CONTEXT, false, OCL_CONTEXT, 0 }, { &ATOM(normalized_coords), CL_SAMPLER_NORMALIZED_COORDS, false, OCL_BOOL, 0 }, { &ATOM(addressing_mode), CL_SAMPLER_ADDRESSING_MODE, false, OCL_SAMPLER_ADDRESSING_MODE, kv_addressing_mode }, { &ATOM(filter_mode), CL_SAMPLER_FILTER_MODE, false, OCL_SAMPLER_FILTER_MODE, kv_filter_mode } }; ecl_info_t program_info[] = { { &ATOM(reference_count), CL_PROGRAM_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(context), CL_PROGRAM_CONTEXT, false, OCL_CONTEXT, 0}, { &ATOM(num_devices), CL_PROGRAM_NUM_DEVICES, false, OCL_UINT, 0}, { &ATOM(devices), CL_PROGRAM_DEVICES, true, OCL_DEVICE, 0 }, { &ATOM(source), CL_PROGRAM_SOURCE, false, OCL_STRING, 0 }, { &ATOM(binary_sizes), CL_PROGRAM_BINARY_SIZES, true, OCL_SIZE, 0 }, { &ATOM(binaries), CL_PROGRAM_BINARIES, true, OCL_STRING, 0 } }; ecl_info_t build_info[] = { { &ATOM(status), CL_PROGRAM_BUILD_STATUS, false, OCL_BUILD_STATUS, kv_build_status }, { &ATOM(options), CL_PROGRAM_BUILD_OPTIONS, false, OCL_STRING, 0 }, { &ATOM(log), CL_PROGRAM_BUILD_LOG, false, OCL_STRING, 0 }, #if CL_VERSION_1_2 == 1 { &ATOM(binary_type), CL_PROGRAM_BINARY_TYPE, false, OCL_PROGRAM_BINARY_TYPE, kv_program_binary_type }, #endif }; ecl_info_t kernel_info[] = { { &ATOM(function_name), CL_KERNEL_FUNCTION_NAME, false, OCL_STRING, 0 }, { &ATOM(num_args), CL_KERNEL_NUM_ARGS, false, OCL_UINT, 0}, { &ATOM(reference_count), CL_KERNEL_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(context), CL_KERNEL_CONTEXT, false, OCL_CONTEXT, 0}, { &ATOM(program), CL_KERNEL_PROGRAM, false, OCL_PROGRAM, 0} }; ecl_info_t workgroup_info[] = { { &ATOM(work_group_size), CL_KERNEL_WORK_GROUP_SIZE, false, OCL_SIZE, 0 }, { &ATOM(compile_work_group_size), CL_KERNEL_COMPILE_WORK_GROUP_SIZE, true, OCL_SIZE, 0}, { &ATOM(local_mem_size), CL_KERNEL_LOCAL_MEM_SIZE, false, OCL_ULONG, 0 }, #if CL_VERSION_1_1 == 1 { &ATOM(preferred_work_group_size_multiple), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, false, OCL_SIZE, 0}, { &ATOM(private_mem_size), CL_KERNEL_PRIVATE_MEM_SIZE, false, OCL_ULONG, 0 }, #endif #if CL_VERSION_1_2 == 1 { &ATOM(global_work_size), CL_KERNEL_GLOBAL_WORK_SIZE, true, OCL_SIZE, 0}, #endif }; ecl_info_t event_info[] = { { &ATOM(command_queue), CL_EVENT_COMMAND_QUEUE, false, OCL_COMMAND_QUEUE, 0}, { &ATOM(command_type), CL_EVENT_COMMAND_TYPE, false, OCL_ENUM, kv_command_type }, { &ATOM(reference_count), CL_EVENT_REFERENCE_COUNT, false, OCL_UINT, 0 }, { &ATOM(execution_status), CL_EVENT_COMMAND_EXECUTION_STATUS, false, OCL_ENUM, kv_execution_status } }; // clGetKernelArgInfo 1.2 #if CL_VERSION_1_2 == 1 ecl_kv_t kv_address_qualifier[] = { { &ATOM(global), CL_KERNEL_ARG_ADDRESS_GLOBAL }, { &ATOM(local), CL_KERNEL_ARG_ADDRESS_LOCAL }, { &ATOM(constant), CL_KERNEL_ARG_ADDRESS_CONSTANT }, { &ATOM(private), CL_KERNEL_ARG_ADDRESS_PRIVATE }, { 0, 0 } }; ecl_kv_t kv_access_qualifier[] = { { &ATOM(read_only), CL_KERNEL_ARG_ACCESS_READ_ONLY }, { &ATOM(write_only), CL_KERNEL_ARG_ACCESS_WRITE_ONLY }, { &ATOM(read_write), CL_KERNEL_ARG_ACCESS_READ_WRITE }, { &ATOM(none), CL_KERNEL_ARG_ACCESS_NONE }, { 0, 0 } }; ecl_kv_t kv_type_qualifier[] = { { &ATOM(none), CL_KERNEL_ARG_TYPE_NONE }, { &ATOM(const), CL_KERNEL_ARG_TYPE_CONST }, { &ATOM(restrict), CL_KERNEL_ARG_TYPE_RESTRICT }, { &ATOM(volatile), CL_KERNEL_ARG_TYPE_VOLATILE }, { 0, 0 } }; ecl_info_t arg_info[] = { { &ATOM(address_qualifier), CL_KERNEL_ARG_ADDRESS_QUALIFIER, false, OCL_ENUM, kv_address_qualifier }, { &ATOM(access_qualifier), CL_KERNEL_ARG_ACCESS_QUALIFIER, false, OCL_ENUM, kv_access_qualifier }, { &ATOM(type_name), CL_KERNEL_ARG_TYPE_NAME, false, OCL_STRING, 0 }, { &ATOM(type_qualifier), CL_KERNEL_ARG_TYPE_QUALIFIER, false, OCL_ENUM, kv_type_qualifier }, { &ATOM(name), CL_KERNEL_ARG_NAME, false, OCL_STRING, 0 }, }; #endif // Error reasons ERL_NIF_TERM ecl_error(cl_int err) { switch(err) { case CL_DEVICE_NOT_FOUND: return ATOM(device_not_found); case CL_DEVICE_NOT_AVAILABLE: return ATOM(device_not_available); case CL_COMPILER_NOT_AVAILABLE: return ATOM(compiler_not_available); case CL_MEM_OBJECT_ALLOCATION_FAILURE: return ATOM(mem_object_allocation_failure); case CL_OUT_OF_RESOURCES: return ATOM(out_of_resources); case CL_OUT_OF_HOST_MEMORY: return ATOM(out_of_host_memory); case CL_PROFILING_INFO_NOT_AVAILABLE: return ATOM(profiling_info_not_available); case CL_MEM_COPY_OVERLAP: return ATOM(mem_copy_overlap); case CL_IMAGE_FORMAT_MISMATCH: return ATOM(image_format_mismatch); case CL_IMAGE_FORMAT_NOT_SUPPORTED: return ATOM(image_format_not_supported); case CL_BUILD_PROGRAM_FAILURE: return ATOM(build_program_failure); case CL_MAP_FAILURE: return ATOM(map_failure); case CL_INVALID_VALUE: return ATOM(invalid_value); case CL_INVALID_DEVICE_TYPE: return ATOM(invalid_device_type); case CL_INVALID_PLATFORM: return ATOM(invalid_platform); case CL_INVALID_DEVICE: return ATOM(invalid_device); case CL_INVALID_CONTEXT: return ATOM(invalid_context); case CL_INVALID_QUEUE_PROPERTIES: return ATOM(invalid_queue_properties); case CL_INVALID_COMMAND_QUEUE: return ATOM(invalid_command_queue); case CL_INVALID_HOST_PTR: return ATOM(invalid_host_ptr); case CL_INVALID_MEM_OBJECT: return ATOM(invalid_mem_object); case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return ATOM(invalid_image_format_descriptor); case CL_INVALID_IMAGE_SIZE: return ATOM(invalid_image_size); case CL_INVALID_SAMPLER: return ATOM(invalid_sampler); case CL_INVALID_BINARY: return ATOM(invalid_binary); case CL_INVALID_BUILD_OPTIONS: return ATOM(invalid_build_options); case CL_INVALID_PROGRAM: return ATOM(invalid_program); case CL_INVALID_PROGRAM_EXECUTABLE: return ATOM(invalid_program_executable); case CL_INVALID_KERNEL_NAME: return ATOM(invalid_kernel_name); case CL_INVALID_KERNEL_DEFINITION: return ATOM(invalid_kernel_definition); case CL_INVALID_KERNEL: return ATOM(invalid_kernel); case CL_INVALID_ARG_INDEX: return ATOM(invalid_arg_index); case CL_INVALID_ARG_VALUE: return ATOM(invalid_arg_value); case CL_INVALID_ARG_SIZE: return ATOM(invalid_arg_size); case CL_INVALID_KERNEL_ARGS: return ATOM(invalid_kernel_args); case CL_INVALID_WORK_DIMENSION: return ATOM(invalid_work_dimension); case CL_INVALID_WORK_GROUP_SIZE: return ATOM(invalid_work_group_size); case CL_INVALID_WORK_ITEM_SIZE: return ATOM(invalid_work_item_size); case CL_INVALID_GLOBAL_OFFSET: return ATOM(invalid_global_offset); case CL_INVALID_EVENT_WAIT_LIST: return ATOM(invalid_event_wait_list); case CL_INVALID_EVENT: return ATOM(invalid_event); case CL_INVALID_OPERATION: return ATOM(invalid_operation); case CL_INVALID_GL_OBJECT: return ATOM(invalid_gl_object); case CL_INVALID_BUFFER_SIZE: return ATOM(invalid_buffer_size); case CL_INVALID_MIP_LEVEL: return ATOM(invalid_mip_level); case CL_INVALID_GLOBAL_WORK_SIZE: return ATOM(invalid_global_work_size); default: return ATOM(unknown); } } ERL_NIF_TERM ecl_make_error(ErlNifEnv* env, cl_int err) { return enif_make_tuple2(env, ATOM(error), ecl_error(err)); } static void ecl_emit_error(char* file, int line, ...) { va_list ap; char* fmt; va_start(ap, line); fmt = va_arg(ap, char*); fprintf(stderr, "%s:%d: ", file, line); vfprintf(stderr, fmt, ap); fprintf(stderr, "\r\n"); va_end(ap); fflush(stderr); } // Parse bool static int get_bool(ErlNifEnv* env, const ERL_NIF_TERM key, cl_bool* val) { UNUSED(env); if (key == ATOM(true)) { *val = true; return 1; } else if (key == ATOM(false)) { *val = false; return 1; } return 0; } // Parse enum static int get_enum(ErlNifEnv* env, const ERL_NIF_TERM key, cl_uint* num, ecl_kv_t* kv) { UNUSED(env); if (!enif_is_atom(env, key)) return 0; while(kv->key) { if (*kv->key == key) { *num = (cl_uint) kv->value; return 1; } kv++; } return 0; } // Parse bitfield static int get_bitfield(ErlNifEnv* env, const ERL_NIF_TERM key, cl_bitfield* field, ecl_kv_t* kv) { UNUSED(env); if (!enif_is_atom(env, key)) return 0; while(kv->key) { if (*kv->key == key) { *field = kv->value; return 1; } kv++; } return 0; } static int get_bitfields(ErlNifEnv* env, const ERL_NIF_TERM term, cl_bitfield* field, ecl_kv_t* kv) { cl_bitfield t; if (enif_is_atom(env, term)) { if (!get_bitfield(env, term, &t, kv)) return 0; *field = t; return 1; } else if (enif_is_empty_list(env, term)) { *field = 0; return 1; } else if (enif_is_list(env, term)) { cl_bitfield fs = 0; ERL_NIF_TERM list = term; ERL_NIF_TERM head, tail; while(enif_get_list_cell(env, list, &head, &tail)) { if (!get_bitfield(env, head, &t, kv)) return 0; fs |= t; list = tail; } if (!enif_is_empty_list(env, list)) return 0; *field = fs; return 1; } return 0; } ERL_NIF_TERM make_enum(ErlNifEnv* env, cl_uint num, ecl_kv_t* kv) { while(kv->key) { if (num == (cl_uint)kv->value) return *kv->key; kv++; } return enif_make_uint(env, num); } ERL_NIF_TERM make_bitfields(ErlNifEnv* env, cl_bitfield v, ecl_kv_t* kv) { ERL_NIF_TERM list = enif_make_list(env, 0); if (v) { int n = 0; while(kv->key) { kv++; n++; } while(n--) { kv--; if ((kv->value & v) == kv->value) list = enif_make_list_cell(env, *kv->key, list); } } return list; } /****************************************************************************** * * Linear hash functions * *****************************************************************************/ #define EPTR_HANDLE(ptr) ((intptr_t)(ptr)) static lhash_value_t ref_hash(void* key) { return (lhash_value_t) key; } static int ref_cmp(void* key, void* data) { if (((intptr_t)key) == EPTR_HANDLE(((ecl_object_t*)data)->opaque)) return 0; return 1; } static void ref_release(void *data) { UNUSED(data); // object's are free'd by garbage collection } // Remove object from hash static void object_erase(ecl_object_t* obj) { ecl_env_t* ecl = obj->env; enif_rwlock_rwlock(ecl->ref_lock); lhash_erase(&ecl->ref, (void*)EPTR_HANDLE(obj->opaque)); enif_rwlock_rwunlock(ecl->ref_lock); } /****************************************************************************** * * Message queue * *****************************************************************************/ // Peek at queue front #if 0 static ecl_message_t* ecl_queue_peek(ecl_queue_t* q) { ecl_qlink_t* ql; enif_mutex_lock(q->mtx); ql = q->front; enif_mutex_unlock(q->mtx); if (ql) return &ql->mesg; else return 0; } #endif // Get message from queue front static int ecl_queue_get(ecl_queue_t* q, ecl_message_t* m) { ecl_qlink_t* ql; enif_mutex_lock(q->mtx); while(!(ql = q->front)) { enif_cond_wait(q->cv, q->mtx); } if (!(q->front = ql->next)) q->rear = 0; q->len--; *m = ql->mesg; if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1])) { ql->next = q->free; q->free = ql; } else enif_free(ql); enif_mutex_unlock(q->mtx); return 0; } // Put message at queue rear static int ecl_queue_put(ecl_queue_t* q, ecl_message_t* m) { ecl_qlink_t* ql; ecl_qlink_t* qr; int res = 0; enif_mutex_lock(q->mtx); if ((ql = q->free)) q->free = ql->next; else ql = enif_alloc(sizeof(ecl_qlink_t)); if (!ql) res = -1; else { ql->mesg = *m; q->len++; ql->next = 0; if (!(qr = q->rear)) { q->front = ql; enif_cond_signal(q->cv); } else qr->next = ql; q->rear = ql; } enif_mutex_unlock(q->mtx); return res; } static int ecl_queue_init(ecl_queue_t* q) { int i; if (!(q->cv = enif_cond_create("queue_cv"))) return -1; if (!(q->mtx = enif_mutex_create("queue_mtx"))) return -1; q->front = 0; q->rear = 0; q->len = 0; for (i = 0; i < MAX_QLINK-1; i++) q->ql[i].next = &q->ql[i+1]; q->ql[MAX_QLINK-1].next = 0; q->free = &q->ql[0]; return 0; } static void ecl_queue_destroy(ecl_queue_t* q) { ecl_qlink_t* ql; enif_cond_destroy(q->cv); enif_mutex_destroy(q->mtx); ql = q->front; while(ql) { ecl_qlink_t* qln = ql->next; if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1])) ; else enif_free(ql); ql = qln; } } /****************************************************************************** * * Threads * *****************************************************************************/ static int ecl_message_send(ecl_thread_t* thr, ecl_message_t* m) { return ecl_queue_put(&thr->q, m); } static int ecl_message_recv(ecl_thread_t* thr, ecl_message_t* m) { int r; if ((r = ecl_queue_get(&thr->q, m)) < 0) return r; return 0; } #if 0 static ecl_message_t* ecl_message_peek(ecl_thread_t* thr, ecl_thread_t** from) { ecl_message_t* m; if ((m = ecl_queue_peek(&thr->q))) { if (from) *from = m->sender; } return m; } #endif static ecl_thread_t* ecl_thread_start(void* (*func)(void* arg), void* arg, int stack_size) { ErlNifThreadOpts* opts; ecl_thread_t* thr; if (!(thr = enif_alloc(sizeof(ecl_thread_t)))) return 0; if (ecl_queue_init(&thr->q) < 0) goto error; if (!(opts = enif_thread_opts_create("ecl_thread_opts"))) goto error; opts->suggested_stack_size = stack_size; thr->arg = arg; enif_thread_create("ecl_thread", &thr->tid, func, thr, opts); enif_thread_opts_destroy(opts); return thr; error: enif_free(thr); return 0; } static int ecl_thread_stop(ecl_thread_t* thr, void** exit_value) { ecl_message_t m; m.type = ECL_MESSAGE_STOP; m.env = 0; ecl_message_send(thr, &m); enif_thread_join(thr->tid, exit_value); ecl_queue_destroy(&thr->q); enif_free(thr); return 0; } static void ecl_thread_exit(void* value) { enif_thread_exit(value); } /****************************************************************************** * * Ecl resource * *****************************************************************************/ static int ecl_resource_init(ErlNifEnv* env, ecl_resource_t* res, char* name, size_t size, // object size void (*dtor)(ErlNifEnv*, ecl_object_t*), ErlNifResourceFlags flags, ErlNifResourceFlags* tried) { res->name = name; res->type = enif_make_atom(env, name); res->size = size; res->res = enif_open_resource_type(env, 0, name, (ErlNifResourceDtor*) dtor, flags, tried); return 0; } // // Reference new kernel argument and Dereference old value // static void unref_kernel_arg(int type, void* val) { switch(type) { case KERNEL_ARG_MEM: if (val) clReleaseMemObject((cl_mem) val); break; case KERNEL_ARG_SAMPLER: if (val) clReleaseSampler((cl_sampler) val); break; case KERNEL_ARG_OTHER: default: break; } } static void ref_kernel_arg(int type, void* val) { switch(type) { case KERNEL_ARG_MEM: if (val) clRetainMemObject((cl_mem) val); break; case KERNEL_ARG_SAMPLER: if (val) clRetainSampler((cl_sampler) val); break; case KERNEL_ARG_OTHER: default: break; } } static int set_kernel_arg(ecl_kernel_t* kern, cl_uint i, int type, void* value) { if (i < kern->num_args) { int old_type = kern->arg[i].type; void* old_value = kern->arg[i].value; ref_kernel_arg(type, value); kern->arg[i].type = type; kern->arg[i].value = value; unref_kernel_arg(old_type, old_value); return 0; } return -1; } /****************************************************************************** * * Resource destructors * *****************************************************************************/ static void ecl_platform_dtor(ErlNifEnv* env, ecl_object_t* obj) { UNUSED(env); UNUSED(obj); DBG("ecl_platform_dtor: %p", obj); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_device_dtor(ErlNifEnv* env, ecl_object_t* obj) { UNUSED(env); UNUSED(obj); DBG("ecl_device_dtor: %p", obj); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_queue_dtor(ErlNifEnv* env, ecl_object_t* obj) { UNUSED(env); DBG("ecl_queue_dtor: %p", obj); clReleaseCommandQueue(obj->queue); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_mem_dtor(ErlNifEnv* env, ecl_object_t* obj) { UNUSED(env); DBG("ecl_mem_dtor: %p", obj); clReleaseMemObject(obj->mem); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_sampler_dtor(ErlNifEnv* env, ecl_object_t* obj) { UNUSED(env); DBG("ecl_sampler_dtor: %p", obj); clReleaseSampler(obj->sampler); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_program_dtor(ErlNifEnv* env, ecl_object_t* obj) { UNUSED(env); DBG("ecl_program_dtor: %p", obj); clReleaseProgram(obj->program); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_kernel_dtor(ErlNifEnv* env, ecl_object_t* obj) { ecl_kernel_t* kern = (ecl_kernel_t*) obj; cl_uint i; UNUSED(env); DBG("ecl_kernel_dtor: %p", kern); for (i = 0; i < kern->num_args; i++) unref_kernel_arg(kern->arg[i].type, kern->arg[i].value); enif_free(kern->arg); clReleaseKernel(kern->obj.kernel); object_erase(obj); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_event_dtor(ErlNifEnv* env, ecl_object_t* obj) { ecl_event_t* evt = (ecl_event_t*) obj; UNUSED(env); DBG("ecl_event_dtor: %p", evt); clReleaseEvent(evt->obj.event); object_erase(obj); if (evt->bin) { if (!evt->rl) enif_release_binary(evt->bin); enif_free(evt->bin); } if (evt->bin_env) enif_free_env(evt->bin_env); if (obj->parent) enif_release_resource(obj->parent); } static void ecl_context_dtor(ErlNifEnv* env, ecl_object_t* obj) { void* exit_value; ecl_context_t* ctx = (ecl_context_t*) obj; UNUSED(env); DBG("ecl_context_dtor: %p", ctx); clReleaseContext(ctx->obj.context); object_erase(obj); // parent is always = 0 // kill the event thread ecl_thread_stop(ctx->thr, &exit_value); } /****************************************************************************** * * make/get * *****************************************************************************/ // For now, wrap the resource object {type,pointer-val,handle} static ERL_NIF_TERM make_object(ErlNifEnv* env, const ERL_NIF_TERM type, void* robject) { if (!robject) return ATOM(undefined); else return enif_make_tuple3(env, type, ecl_make_sizet(env, (size_t) robject), enif_make_resource(env, robject)); } // Accept {type,pointer-val,handle} static int get_ecl_object(ErlNifEnv* env, const ERL_NIF_TERM term, ecl_resource_t* rtype, bool nullp, ecl_object_t** robjectp) { const ERL_NIF_TERM* elem; int arity; size_t handle; // not really a size_t but the type has a good size if (nullp && (term == ATOM(undefined))) { *robjectp = 0; return 1; } if (!enif_get_tuple(env, term, &arity, &elem)) return 0; if (arity != 3) return 0; if (!enif_is_atom(env, elem[0]) || (elem[0] != rtype->type)) return 0; if (!ecl_get_sizet(env, elem[1], &handle)) return 0; if (!enif_get_resource(env, elem[2], rtype->res, (void**) robjectp)) return 0; if ((size_t)*robjectp != handle) return 0; return 1; } #if 0 static int get_ecl_object_list(ErlNifEnv* env, const ERL_NIF_TERM term, ecl_resource_t* rtype, bool nullp, ecl_object_t** robjv, size_t* rlen) { size_t maxlen = *rlen; size_t n = 0; ERL_NIF_TERM list = term; while(n < maxlen) { ERL_NIF_TERM head, tail; if (enif_get_list_cell(env, list, &head, &tail)) { if (!get_ecl_object(env, head, rtype, nullp, robjv)) return 0; n++; robjv++; list = tail; } else if (enif_is_empty_list(env, list)) { *rlen = n; return 1; } else return 0; } return 0; } #endif static int get_object(ErlNifEnv* env, const ERL_NIF_TERM term, ecl_resource_t* rtype, bool nullp, void** rptr) { ecl_object_t* obj; if (get_ecl_object(env, term, rtype, nullp, &obj)) { *rptr = obj ? obj->opaque : 0; return 1; } return 0; } static int get_object_list(ErlNifEnv* env, const ERL_NIF_TERM term, ecl_resource_t* rtype, bool nullp, void** robjv, cl_uint* rlen) { cl_uint maxlen = *rlen; cl_uint n = 0; ERL_NIF_TERM list = term; while(n < maxlen) { ERL_NIF_TERM head, tail; if (enif_get_list_cell(env, list, &head, &tail)) { if (!get_object(env, head, rtype, nullp, robjv)) return 0; n++; robjv++; list = tail; } else if (enif_is_empty_list(env, list)) { *rlen = n; return 1; } else return 0; } return 0; } static int get_sizet_list(ErlNifEnv* env, const ERL_NIF_TERM term, size_t* rvec, size_t* rlen) { size_t maxlen = *rlen; size_t n = 0; ERL_NIF_TERM list = term; while(n < maxlen) { ERL_NIF_TERM head, tail; if (enif_get_list_cell(env, list, &head, &tail)) { if (!ecl_get_sizet(env, head, rvec)) return 0; n++; rvec++; list = tail; } else if (enif_is_empty_list(env, list)) { *rlen = n; return 1; } else return 0; } if (enif_is_empty_list(env, list)) { *rlen = n; return 1; } return 0; } static int get_binary_list(ErlNifEnv* env, const ERL_NIF_TERM term, ErlNifBinary* rvec, size_t* rlen) { size_t maxlen = *rlen; size_t n = 0; ERL_NIF_TERM list = term; while(n < maxlen) { ERL_NIF_TERM head, tail; if (enif_get_list_cell(env, list, &head, &tail)) { if (!enif_inspect_binary(env, head, rvec)) return 0; n++; rvec++; list = tail; } else if (enif_is_empty_list(env, list)) { *rlen = n; return 1; } else return 0; } return 0; } #if CL_VERSION_1_2 == 1 // avoid warning // currently onlt used my compile_program which is a 1.2 function // free an array of strings static void free_string_list(char** rvec, size_t n) { int i; for (i = 0; i < (int)n; i++) enif_free(rvec[i]); } // get a list of, max *rlen, dynamically allocated, strings. static int get_string_list(ErlNifEnv* env, const ERL_NIF_TERM term, char** rvec, size_t* rlen) { char** rvec0 = rvec; size_t maxlen = *rlen; size_t n = 0; ERL_NIF_TERM list = term; ERL_NIF_TERM head, tail; while((n < maxlen) && enif_get_list_cell(env, list, &head, &tail)) { char* str; unsigned int len; if (!enif_get_list_length(env, head, &len)) goto error; if (!(str = enif_alloc(len+1))) goto error; if (!enif_get_string(env, head, str, len+1, ERL_NIF_LATIN1)) goto error; *rvec++ = str; n++; list = tail; } if (enif_is_empty_list(env, list)) { *rlen = n; return 1; } error: free_string_list(rvec0, rvec-rvec0); return 0; } #endif // Copy a "local" binary to a new process independent environment // fill the binary structure with the new data and return it. // static int ecl_make_binary(ErlNifEnv* src_env, const ERL_NIF_TERM src, ErlNifEnv* dst_env, ErlNifBinary* bin) { ERL_NIF_TERM ref_counted; if (enif_is_binary(src_env, src)) { // Update refc (and/or fix heap binaries) ref_counted = enif_make_copy(dst_env, src); return enif_inspect_binary(dst_env, ref_counted, bin); } else { // iolist to binary if (!enif_inspect_iolist_as_binary(src_env, src, bin)) return 0; // ref count binary ? enif_make_binary(dst_env, bin); return 1; } } // Lookup a openCL object (native => reource ecl_object_t*) static ecl_object_t* ecl_lookup(ErlNifEnv* env, void* ptr) { if (!ptr) return 0; else { ecl_env_t* ecl = enif_priv_data(env); ecl_object_t* obj; enif_rwlock_rlock(ecl->ref_lock); obj = (ecl_object_t*) lhash_lookup(&ecl->ref,(void*)EPTR_HANDLE(ptr)); enif_rwlock_runlock(ecl->ref_lock); return obj; } } // Create a new openCL resource object static ecl_object_t* ecl_new(ErlNifEnv* env, ecl_resource_t* rtype, void* ptr, ecl_object_t* parent, cl_int version) { if (!ptr) return 0; else { ecl_env_t* ecl = enif_priv_data(env); ecl_object_t* obj; obj = enif_alloc_resource(rtype->res, rtype->size); if (obj) { if (parent) enif_keep_resource(parent); obj->opaque = ptr; obj->env = ecl; obj->parent = parent; if(version == -1) { version = parent ? parent->version : ecl->icd_version; } obj->version = (version < ecl->icd_version) ? version : ecl->icd_version; enif_rwlock_rwlock(ecl->ref_lock); lhash_insert_new(&ecl->ref, (void*)EPTR_HANDLE(ptr), obj); enif_rwlock_rwunlock(ecl->ref_lock); } return obj; } } static ERL_NIF_TERM ecl_make_object(ErlNifEnv* env, ecl_resource_t* rtype, void* ptr, ecl_object_t* parent) { ecl_object_t* obj = ecl_new(env,rtype,ptr,parent,-1); ERL_NIF_TERM res; res = make_object(env, rtype->type, obj); if (obj) enif_release_resource(obj); return res; } // lookup or create a new ecl_object_t resource static ecl_object_t* ecl_maybe_new(ErlNifEnv* env, ecl_resource_t* rtype, void* ptr, ecl_object_t* parent, bool* is_new) { ecl_object_t* obj = ecl_lookup(env, ptr); if (!obj) { obj = ecl_new(env, rtype, ptr, parent,-1); *is_new = true; } else *is_new = false; return obj; } // lookup or create resource object, return as erlang term static ERL_NIF_TERM ecl_lookup_object(ErlNifEnv* env, ecl_resource_t* rtype, void* ptr, ecl_object_t* parent) { bool is_new; ERL_NIF_TERM res; ecl_object_t* obj = ecl_maybe_new(env,rtype,ptr,parent,&is_new); res = make_object(env, rtype->type, obj); if (obj && is_new) enif_release_resource(obj); return res; } static ERL_NIF_TERM ecl_make_kernel(ErlNifEnv* env, cl_kernel kernel, ecl_object_t* parent) { ecl_kernel_t* kern = (ecl_kernel_t*) ecl_new(env,&kernel_r, (void*)kernel,parent,-1); ERL_NIF_TERM res; cl_uint num_args; size_t sz; // Get number of arguments, FIXME: check error return clGetKernelInfo(kernel,CL_KERNEL_NUM_ARGS,sizeof(num_args),&num_args,0); sz = num_args*sizeof(ecl_kernel_arg_t); kern->arg = (ecl_kernel_arg_t*) enif_alloc(sz); memset(kern->arg, 0, sz); kern->num_args = num_args; res = make_object(env, kernel_r.type, kern); if (kern) enif_release_resource(kern); return res; } static ERL_NIF_TERM ecl_make_event(ErlNifEnv* env, cl_event event, bool rd, bool rl, ErlNifEnv* bin_env, ErlNifBinary* bin, ecl_object_t* parent) { ecl_event_t* evt = (ecl_event_t*) ecl_new(env,&event_r, (void*)event,parent,-1); ERL_NIF_TERM res; evt->bin_env = bin_env; evt->bin = bin; evt->rd = rd; evt->rl = rl; res = make_object(env, event_r.type, (ecl_object_t*) evt); if (evt) enif_release_resource(evt); return res; } static ERL_NIF_TERM ecl_make_context(ErlNifEnv* env, cl_context context, cl_int version) { ERL_NIF_TERM res; ecl_context_t* ctx = (ecl_context_t*) ecl_new(env,&context_r, (void*)context,0,version); ctx->thr = ecl_thread_start(ecl_context_main, ctx, 8); // 8K stack! res = make_object(env, context_r.type, (ecl_object_t*) ctx); if (ctx) enif_release_resource(ctx); return res; } typedef cl_int CL_API_CALL info_fn_t(void* ptr, cl_uint param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret); typedef cl_int CL_API_CALL info2_fn_t(void* ptr1, void* ptr2, cl_uint param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret); // return size of type static size_t ecl_sizeof(ocl_type_t type) { switch(type) { case OCL_CHAR: return sizeof(cl_char); case OCL_UCHAR: return sizeof(cl_uchar); case OCL_SHORT: return sizeof(cl_short); case OCL_USHORT: return sizeof(cl_ushort); case OCL_INT: return sizeof(cl_int); case OCL_UINT: return sizeof(cl_uint); case OCL_LONG: return sizeof(cl_long); case OCL_ULONG: return sizeof(cl_ulong); case OCL_HALF: return sizeof(cl_half); case OCL_FLOAT: return sizeof(cl_float); case OCL_DOUBLE: return sizeof(cl_double); case OCL_BOOL: return sizeof(cl_bool); case OCL_STRING: return sizeof(cl_char*); case OCL_ENUM: return sizeof(cl_int); case OCL_BITFIELD: return sizeof(cl_bitfield); case OCL_POINTER: return sizeof(void*); case OCL_SIZE: return sizeof(size_t); case OCL_PLATFORM: return sizeof(void*); case OCL_DEVICE: return sizeof(void*); case OCL_CONTEXT: return sizeof(void*); case OCL_PROGRAM: return sizeof(void*); case OCL_COMMAND_QUEUE: return sizeof(void*); case OCL_IMAGE_FORMAT: return sizeof(cl_image_format); #if CL_VERSION_1_2 == 1 case OCL_DEVICE_PARTITION: return sizeof(cl_device_partition_property); #endif default: DBG("info_size: unknown type %d detected", type); return sizeof(cl_int); } } // put basic value types static ERL_NIF_TERM make_info_element(ErlNifEnv* env, ocl_type_t type, void* ptr, ecl_kv_t* kv) { switch(type) { case OCL_CHAR: return enif_make_int(env, *((cl_char*)ptr)); case OCL_SHORT: return enif_make_int(env, *((cl_short*)ptr)); case OCL_INT: return enif_make_int(env, *((cl_int*)ptr)); case OCL_LONG: return enif_make_int64(env, *((cl_long*)ptr)); case OCL_UCHAR: return enif_make_uint(env, *((cl_uchar*)ptr)); case OCL_USHORT: return enif_make_uint(env, *((cl_ushort*)ptr)); case OCL_UINT: return enif_make_uint(env, *((cl_uint*)ptr)); case OCL_HALF: return enif_make_uint(env, *((cl_half*)ptr)); case OCL_ULONG: return enif_make_uint64(env, *((cl_ulong*)ptr)); case OCL_SIZE: return ecl_make_sizet(env, *((size_t*)ptr)); case OCL_FLOAT: return enif_make_double(env, *((cl_float*)ptr)); case OCL_DOUBLE: return enif_make_double(env, *((cl_double*)ptr)); case OCL_BOOL: return (*((cl_bool*)ptr)) ? ATOM(true) : ATOM(false); // case POINTER: cbuf_put_pointer(data, *((pointer_t*)ptr)); break; case OCL_STRING: return enif_make_string_len(env, (char*) ptr, strlen((char*) ptr), ERL_NIF_LATIN1); case OCL_BITFIELD: return make_bitfields(env, *((cl_bitfield*)ptr), kv); case OCL_ENUM: return make_enum(env, *((cl_int*)ptr), kv); case OCL_POINTER: return ecl_make_sizet(env, *((intptr_t*)ptr)); case OCL_PLATFORM: return ecl_lookup_object(env,&platform_r,*(void**)ptr,0); case OCL_DEVICE: return ecl_lookup_object(env,&device_r,*(void**)ptr,0); case OCL_CONTEXT: return ecl_lookup_object(env,&context_r,*(void**)ptr,0); case OCL_PROGRAM: // FIXME: find context object, pass as parent return ecl_lookup_object(env,&program_r,*(void**)ptr,0); case OCL_COMMAND_QUEUE: // FIXME: find context object, pass as parent return ecl_lookup_object(env,&command_queue_r,*(void**)ptr,0); case OCL_IMAGE_FORMAT: { cl_image_format* fmt = (cl_image_format*) ptr; ERL_NIF_TERM channel_order; ERL_NIF_TERM channel_type; channel_order = make_enum(env,fmt->image_channel_order, kv_channel_order); channel_type = make_enum(env,fmt->image_channel_data_type, kv_channel_type); return enif_make_tuple2(env, channel_order, channel_type); } #if CL_VERSION_1_2 == 1 case OCL_DEVICE_PARTITION: { // cl_device_partition_property cl_device_partition_property* prop = (cl_device_partition_property*)ptr; ERL_NIF_TERM term = ATOM(undefined); switch(*prop++) { case CL_DEVICE_PARTITION_EQUALLY: term = enif_make_uint(env, *prop); return enif_make_tuple2(env, ATOM(equally), term); case CL_DEVICE_PARTITION_BY_COUNTS: { cl_device_partition_property* pp = prop; term = enif_make_list(env, 0); while(*pp != CL_DEVICE_PARTITION_BY_COUNTS_LIST_END) pp++; if (pp > prop) { // build list backwards pp--; while(pp >= prop) { ERL_NIF_TERM ui = enif_make_uint(env, *pp); term = enif_make_list_cell(env, ui, term); pp--; } } return enif_make_tuple2(env, ATOM(by_counts), term); } case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: switch(*prop) { case CL_DEVICE_AFFINITY_DOMAIN_NUMA: term = ATOM(numa); break; case CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE: term = ATOM(l4_cache); break; case CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE: term = ATOM(l3_cache); break; case CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE: term = ATOM(l2_cache); break; case CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE: term = ATOM(l1_cache); break; case CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE: term = ATOM(next_partitionable); break; default: return ATOM(undefined); } return enif_make_tuple2(env, ATOM(by_affinity_domain), term); default: return ATOM(undefined); } break; } #endif default: return ATOM(undefined); } } static ERL_NIF_TERM make_info_value(ErlNifEnv* env, ecl_info_t* iptr, void* buf, size_t buflen) { char* dptr = (char*) buf; ERL_NIF_TERM value; if (iptr->is_array) { // arrays are return as lists of items ERL_NIF_TERM list = enif_make_list(env, 0); size_t elem_size = ecl_sizeof(iptr->info_type); size_t n = (buflen / elem_size); dptr += (n*elem_size); // run backwards!!! while (buflen >= elem_size) { dptr -= elem_size; value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info); list = enif_make_list_cell(env, value, list); buflen -= elem_size; } value = list; } else { value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info); } return value; } // Find object value // return {ok,Value} | {error,Reason} | exception badarg // ERL_NIF_TERM make_object_info(ErlNifEnv* env, ERL_NIF_TERM key, ecl_object_t* obj, info_fn_t* func, ecl_info_t* info, size_t num_info) { size_t returned_size = 0; size_t size = MAX_INFO_SIZE; unsigned char buf[MAX_INFO_SIZE]; void* ptr = buf; ERL_NIF_TERM res; cl_int err; unsigned int i; if (!enif_is_atom(env, key)) return enif_make_badarg(env); i = 0; while((i < num_info) && (*info[i].info_key != key)) i++; if (i == num_info) return enif_make_badarg(env); // or error ? err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size); if (err == CL_INVALID_VALUE) { // try again allocate returned_size, returned_size does not // (yet) return the actual needed bytes (by spec) // but it looks like it... ;-) size = returned_size; if (!(ptr = enif_alloc(size))) return ecl_make_error(env, CL_OUT_OF_HOST_MEMORY); err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size); } if (!err) { res = enif_make_tuple2(env, ATOM(ok), make_info_value(env,&info[i],ptr,returned_size)); } else res = ecl_make_error(env, err); if (ptr != buf) enif_free(ptr); return res; } ERL_NIF_TERM make_object_info2(ErlNifEnv* env, ERL_NIF_TERM key, ecl_object_t* obj1, void* obj2, info2_fn_t* func, ecl_info_t* info, size_t num_info) { size_t returned_size = 0; cl_long *buf; cl_int err; unsigned int i; ERL_NIF_TERM result; if (!enif_is_atom(env, key)) return enif_make_badarg(env); i = 0; while((i < num_info) && (*info[i].info_key != key)) i++; if (i == num_info) return enif_make_badarg(env); // or error ? if (!(err = (*func)(obj1->opaque, obj2, info[i].info_id, 0, NULL, &returned_size))) { if (!(buf = enif_alloc(returned_size))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); if (!(err = (*func)(obj1->opaque, obj2, info[i].info_id, returned_size, buf, &returned_size))) { result = enif_make_tuple2(env, ATOM(ok), make_info_value(env, &info[i], buf, returned_size)); enif_free(buf); return result; } } return ecl_make_error(env, err); } /****************************************************************************** * * main ecl event loop run as a thread. * The main purpose is to dispatch and send messages to owners * *****************************************************************************/ static void* ecl_context_main(void* arg) { ecl_thread_t* self = arg; // ecl_context_t* ctx = self->arg; DBG("ecl_context_main: started (%p)", self); while(1) { ecl_message_t m; int res; ecl_message_recv(self, &m); UNUSED(res); switch(m.type) { case ECL_MESSAGE_STOP: { DBG("ecl_context_main: stopped by command"); if (m.env) { enif_send(0, &m.sender, m.env, enif_make_tuple3(m.env, ATOM(cl_async), m.ref, ATOM(ok))); enif_free_env(m.env); } ecl_thread_exit(self); break; } case ECL_MESSAGE_FLUSH: { // flush message queue cl_int err; DBG("ecl_context_main: flush q=%lu", (unsigned long) m.queue); err = clFlush(m.queue->queue); // send {cl_async, Ref, ok | {error,Reason}} if (m.env) { ERL_NIF_TERM reply; reply = !err ? ATOM(ok) : ecl_make_error(m.env, err); res = enif_send(0, &m.sender, m.env, enif_make_tuple3(m.env, ATOM(cl_async), m.ref, reply)); DBG("ecl_context_main: send r=%d", res); enif_free_env(m.env); } enif_release_resource(m.queue); break; } case ECL_MESSAGE_FINISH: { // finish message queue cl_int err; DBG("ecl_context_main: finish q=%lu", (unsigned long) m.queue); err = clFlush(m.queue->queue); // send {cl_async, Ref, ok | {error,Reason}} if (m.env) { ERL_NIF_TERM reply; reply = !err ? ATOM(ok) : ecl_make_error(m.env, err); res = enif_send(0, &m.sender, m.env, enif_make_tuple3(m.env, ATOM(cl_async), m.ref, reply)); DBG("ecl_context_main: send r=%d", res); enif_free_env(m.env); } enif_release_resource(m.queue); break; } case ECL_MESSAGE_WAIT_FOR_EVENT: { // wait for one event cl_int err; cl_event list[1]; DBG("ecl_context_main: wait_for_event e=%lu", (unsigned long) m.event); list[0] = m.event->obj.event; err = clWaitForEvents(1, list); DBG("ecl_context_main: wait_for_event err=%d", err); // reply to caller pid ! if (m.env) { ERL_NIF_TERM reply; if (!err) { cl_int status; // read status COMPLETE | ERROR // FIXME: check error clGetEventInfo(m.event->obj.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, 0); switch(status) { case CL_COMPLETE: DBG("ecl_context_main: wait_for_event complete"); if (m.event->bin && m.event->rd) { m.event->rl = true; reply = enif_make_binary(m.env, m.event->bin); } else reply = ATOM(complete); break; default: DBG("ecl_context_main: wait_for_event: status=%d", status); // must/should be an error reply = ecl_make_error(m.env, status); break; } } else reply = ecl_make_error(m.env, err); res = enif_send(0, &m.sender, m.env, enif_make_tuple3(m.env, ATOM(cl_event), m.ref, reply)); DBG("ecl_context_main: send r=%d", res); enif_free_env(m.env); } enif_release_resource(m.event); break; } default: break; } } return 0; } // // API functions // // noop - no operation for NIF interface performance benchmarking static ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { UNUSED(env); UNUSED(argc); UNUSED(argv); return ATOM(ok); } // version - return list of API versions supported static ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ERL_NIF_TERM list = enif_make_list(env, 0); ERL_NIF_TERM vsn; UNUSED(env); UNUSED(argc); UNUSED(argv); #if CL_VERSION_1_0 == 1 vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 0)); list = enif_make_list_cell(env, vsn, list); #endif #if CL_VERSION_1_1 == 1 vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 1)); list = enif_make_list_cell(env, vsn, list); #endif #if CL_VERSION_1_2 == 1 vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 2)); list = enif_make_list_cell(env, vsn, list); #endif return list; } static ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { cl_uint num_platforms; cl_platform_id platform_id[MAX_PLATFORMS]; ERL_NIF_TERM idv[MAX_PLATFORMS]; ERL_NIF_TERM platform_list; cl_uint i; cl_int err; UNUSED(argc); UNUSED(argv); if ((err = clGetPlatformIDs(MAX_PLATFORMS, platform_id, &num_platforms))) return ecl_make_error(env, err); for (i = 0; i < num_platforms; i++) idv[i] = ecl_lookup_object(env,&platform_r,platform_id[i],0); platform_list = enif_make_list_from_array(env, idv,num_platforms); return enif_make_tuple2(env, ATOM(ok), platform_list); } static ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_platform; UNUSED(argc); if (!get_ecl_object(env, argv[0], &platform_r, false, &o_platform)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_platform, (info_fn_t*) clGetPlatformInfo, platform_info, sizeof_array(platform_info)); } static ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { cl_device_type device_type = 0; cl_device_id device_id[MAX_DEVICES]; ERL_NIF_TERM idv[MAX_DEVICES]; ERL_NIF_TERM device_list; cl_uint num_devices; cl_uint i; cl_platform_id platform; cl_int err; UNUSED(argc); if (!get_object(env, argv[0], &platform_r, true,(void**)&platform)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &device_type, kv_device_type)) return enif_make_badarg(env); if ((err = clGetDeviceIDs(platform, device_type, MAX_DEVICES, device_id, &num_devices))) return ecl_make_error(env, err); for (i = 0; i < num_devices; i++) idv[i] = ecl_lookup_object(env, &device_r, device_id[i], 0); device_list = enif_make_list_from_array(env, idv, num_devices); return enif_make_tuple2(env, ATOM(ok), device_list); } #if CL_VERSION_1_2 == 1 // parse: // {equally,} | // {by_counts, []} // {by_affinity_domain, num|l4_cache|l3_cache|l2_cache|l1_cache| // next_partiionable} // static int get_partition_properties(ErlNifEnv* env, const ERL_NIF_TERM term, cl_device_partition_property* rvec, size_t* rlen) { const ERL_NIF_TERM* elem; int arity; size_t maxlen = *rlen; size_t n = 0; if (!enif_get_tuple(env, term, &arity, &elem)) return 0; if (arity != 2) return 0; if (!enif_is_atom(env, elem[0])) return 0; if (elem[0] == ATOM(equally)) { unsigned long v; *rvec++ = CL_DEVICE_PARTITION_EQUALLY; if (!enif_get_ulong(env, elem[1], &v)) return 0; *rvec++ = v; n=2; } else if (elem[0] == ATOM(by_counts)) { ERL_NIF_TERM head, tail; ERL_NIF_TERM list = elem[1]; unsigned long v; *rvec++ = CL_DEVICE_PARTITION_BY_COUNTS; n++; while((n < maxlen-1) && enif_get_list_cell(env, list, &head, &tail)) { if (!enif_get_ulong(env, head, &v)) return 0; *rvec++=v; n++; list = tail; } if (!enif_is_empty_list(env, list)) return 0; *rvec++ = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END; n++; } else if (elem[0] == ATOM(by_affinity_domain)) { *rvec++ = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; if (elem[1] == ATOM(numa)) *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NUMA; else if (elem[1] == ATOM(l4_cache)) *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE; else if (elem[1] == ATOM(l3_cache)) *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE; else if (elem[1] == ATOM(l2_cache)) *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE; else if (elem[1] == ATOM(l1_cache)) *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE; else if (elem[1] == ATOM(next_partitionable)) *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; else return 0; n = 2; } else return 0; *rlen = n; return 1; } static ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* d; cl_device_id out_devices[MAX_DEVICES]; ERL_NIF_TERM idv[MAX_DEVICES]; ERL_NIF_TERM device_list; cl_uint num_devices; cl_uint i; cl_device_partition_property properties[128]; size_t num_property = 128-1; cl_int err; // fixme calc length of properties ! if (!get_ecl_object(env, argv[0], &device_r, false, &d)) return enif_make_badarg(env); if (!get_partition_properties(env, argv[1], properties, &num_property)) return enif_make_badarg(env); properties[num_property] = 0; err = clCreateSubDevices(d->device, properties, MAX_DEVICES, out_devices, &num_devices); if (err) return ecl_make_error(env, err); for (i = 0; i < num_devices; i++) { ecl_object_t* obj; if ((obj = ecl_lookup(env, out_devices[i])) == NULL) obj = ecl_new(env, &device_r, out_devices[i], 0, d->version); idv[i] = make_object(env, device_r.type, obj); } device_list = enif_make_list_from_array(env, idv, num_devices); return enif_make_tuple2(env, ATOM(ok), device_list); } #endif static ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_device; UNUSED(argc); if (!get_ecl_object(env, argv[0], &device_r, false, &o_device)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_device, (info_fn_t*) clGetDeviceInfo, device_info, sizeof_array(device_info)); } typedef struct { ErlNifPid sender; // sender pid ErlNifEnv* s_env; // senders message environment (ref, bin's etc) ErlNifEnv* r_env; // receiver message environment (ref, bin's etc) ErlNifTid tid; // Calling thread } ecl_notify_data_t; void CL_CALLBACK ecl_context_notify(const char *errinfo, const void* private_info, size_t cb, void * user_data) { /* ecl_notify_data_t* bp = user_data; */ /* ERL_NIF_TERM reply; */ /* ErlNifEnv* s_env; */ /* int res; */ UNUSED(errinfo); UNUSED(private_info); UNUSED(cb); UNUSED(user_data); DBG("ecl_context_notify: user_data=%p", user_data); DBG("ecl_context_notify: priv_info=%p cb=%d", private_info, cb); CL_ERROR("CL ERROR ASYNC: %s", errinfo); } // // cl:create_context([cl_device_id()]) -> // {ok, cl_context()} | {error, cl_error()} // static ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { cl_device_id device_list[MAX_DEVICES]; cl_uint num_devices = MAX_DEVICES; cl_context context; cl_int err; ecl_notify_data_t* bp; UNUSED(argc); if (!get_object_list(env, argv[0], &device_r, false, (void**) device_list, &num_devices)) return enif_make_badarg(env); if (!(bp = enif_alloc(sizeof(ecl_notify_data_t)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? if (!(bp->r_env = enif_alloc_env())) { enif_free(bp); return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } (void) enif_self(env, &bp->sender); bp->s_env = env; bp->tid = enif_thread_self(); DBG("ecl_create_context: self %p", bp->tid); context = clCreateContext(0, num_devices, device_list, ecl_context_notify, bp, &err); if (context) { cl_uint i; ERL_NIF_TERM t; ecl_object_t *dev; cl_int version = 100; for(i = 0; i < num_devices; i++) { dev = ecl_lookup(env, device_list[i]); /* Should hopefully be the same for all devices ? use the least version */ if(dev->version < version) version = dev->version; } t = ecl_make_context(env, context, version); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_context, (info_fn_t*) clGetContextInfo, context_info, sizeof_array(context_info)); } static ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_device_id device; cl_command_queue_properties properties; cl_command_queue queue; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_object(env, argv[1], &device_r, false, (void**) &device)) return enif_make_badarg(env); if (!get_bitfields(env, argv[2], &properties, kv_command_queue_properties)) return enif_make_badarg(env); queue = clCreateCommandQueue(o_context->context, device, properties, &err); if (queue) { ERL_NIF_TERM t; t = ecl_make_object(env, &command_queue_r,(void*) queue, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_queue, (info_fn_t*) clGetCommandQueueInfo, queue_info, sizeof_array(queue_info)); } static ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; size_t size; cl_mem_flags mem_flags; cl_mem mem; ErlNifBinary bin; void* host_ptr = 0; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[2], &size)) return enif_make_badarg(env); if (!enif_inspect_iolist_as_binary(env, argv[3], &bin)) return enif_make_badarg(env); // How do we keep binary data (CL_MEM_USE_HOST_PTR) // We should probably make sure that the buffer is read_only in this // case! // we must be able to reference count the binary object! // USE enif_make_copy !!!! this copy is done to the thread environment! if (bin.size > 0) { host_ptr = bin.data; mem_flags |= CL_MEM_COPY_HOST_PTR; if (size < bin.size) size = bin.size; } else if (size) mem_flags |= CL_MEM_ALLOC_HOST_PTR; mem = clCreateBuffer(o_context->context, mem_flags, size, host_ptr, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &mem_r,(void*) mem, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_buf; cl_mem_flags mem_flags; cl_mem mem; ERL_NIF_TERM info; ERL_NIF_TERM info_arg1, info_arg2; cl_buffer_region reg; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &mem_r, false, &o_buf)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags)) return enif_make_badarg(env); if (!enif_is_atom(env, argv[2]) || (argv[2] != ATOM(region))) return enif_make_badarg(env); info = argv[3]; if (!enif_is_list(env, info)) return enif_make_badarg(env); enif_get_list_cell(env, info, &info_arg1, &info); if (!enif_is_list(env, info)) return enif_make_badarg(env); enif_get_list_cell(env, info, &info_arg2, &info); if (!enif_is_empty_list(env, info)) return enif_make_badarg(env); if (!ecl_get_sizet(env, info_arg1, ®.origin)) return enif_make_badarg(env); if (!ecl_get_sizet(env, info_arg2, ®.size)) return enif_make_badarg(env); mem = clCreateSubBuffer(o_buf->mem, mem_flags, CL_BUFFER_CREATE_TYPE_REGION, ®, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &mem_r,(void*) mem, o_buf); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #endif // // format {channel_order, channel_data_type} (old) | // {'cl_image_format', order, data_type } // static int get_image_format(ErlNifEnv* env, ERL_NIF_TERM arg, cl_image_format* format) { const ERL_NIF_TERM* rec; int i, arity; if (!enif_get_tuple(env, arg, &arity, &rec)) return 0; if (arity == 2) i = 0; else if (arity == 3) { i = 1; if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_format))) return 0; } else return 0; if (!get_enum(env, rec[i], &format->image_channel_order, kv_channel_order)) return 0; if (!get_enum(env, rec[i+1], &format->image_channel_data_type, kv_channel_type)) return 0; return 1; } #if CL_VERSION_1_2 == 1 // // format {'cl_image_desc',image_type,image_width,image_height,image_depth, // image_array_size,image_row_pitch,image_slice_pitch, // num_mip_levels,num_samples,buffer} // static int get_image_desc(ErlNifEnv* env, ERL_NIF_TERM arg, cl_image_desc* desc) { const ERL_NIF_TERM* rec; int arity; if (!enif_get_tuple(env, arg, &arity, &rec) || (arity != 11)) return 0; if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_desc))) return 0; if (!get_enum(env, rec[1], &desc->image_type, kv_mem_object_type)) return 0; if (!ecl_get_sizet(env, rec[2], &desc->image_width)) return 0; if (!ecl_get_sizet(env, rec[3], &desc->image_height)) return 0; if (!ecl_get_sizet(env, rec[4], &desc->image_depth)) return 0; if (!ecl_get_sizet(env, rec[5], &desc->image_array_size)) return 0; if (!ecl_get_sizet(env, rec[6], &desc->image_row_pitch)) return 0; if (!ecl_get_sizet(env, rec[7], &desc->image_slice_pitch)) return 0; desc->num_mip_levels = 0; // rec[8] according to spec desc->num_samples = 0; // rec[9] according to spec if (!get_object(env, rec[10], &mem_r, true, (void**)&desc->buffer)) return 0; return 1; } #endif static ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; size_t width; size_t height; size_t row_pitch; cl_image_format format; cl_mem_flags mem_flags; cl_mem mem; ErlNifBinary bin; void* host_ptr = 0; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags)) return enif_make_badarg(env); if (!get_image_format(env, argv[2], &format)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &width)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &height)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &row_pitch)) return enif_make_badarg(env); if (!enif_inspect_iolist_as_binary(env, argv[6], &bin)) return enif_make_badarg(env); // How do we keep binary data (CL_MEM_USE_HOST_PTR) (read_only) // we must be able to reference count the binary object! if (bin.size > 0) { host_ptr = bin.data; mem_flags |= CL_MEM_COPY_HOST_PTR; } else if (width && height) mem_flags |= CL_MEM_ALLOC_HOST_PTR; if(o_context->version >= 12) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = width; desc.image_height = height; desc.image_depth = 1; // used with IMAGE3D desc.image_array_size = 1; // used with IMAGE2D/3D_ARRAY? desc.image_row_pitch = row_pitch; desc.image_slice_pitch = 0; // maybe 0 for 2D image desc.num_mip_levels = 0; // must be 0 desc.num_samples= 0; // must be 0 desc.buffer = NULL; // used when CL_MEM_OBJECT_IMAGE1D_BUFFER mem = eclCreateImage(o_context->context, mem_flags, &format, &desc, host_ptr, &err); } else { mem = clCreateImage2D(o_context->context, mem_flags, &format, width, height, row_pitch, host_ptr, &err); } if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &mem_r,(void*) mem, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; size_t width; size_t height; size_t depth; size_t row_pitch; size_t slice_pitch; cl_image_format format; cl_mem_flags mem_flags; cl_mem mem; ErlNifBinary bin; void* host_ptr = 0; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags)) return enif_make_badarg(env); if (!get_image_format(env, argv[2], &format)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &width)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &height)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &depth)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[6], &row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[7], &slice_pitch)) return enif_make_badarg(env); if (!enif_inspect_iolist_as_binary(env, argv[8], &bin)) return enif_make_badarg(env); // How do we keep binary data (CL_MEM_USE_HOST_PTR) (read_only) // we must be able to reference count the binary object! if (bin.size > 0) { host_ptr = bin.data; mem_flags |= CL_MEM_COPY_HOST_PTR; } else if (width && height && depth) mem_flags |= CL_MEM_ALLOC_HOST_PTR; if(o_context->version >= 12) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = width; desc.image_height = height; desc.image_depth = depth; // used with IMAGE3D desc.image_array_size = 1; // used with IMAGE2D/3D_ARRAY? desc.image_row_pitch = row_pitch; desc.image_slice_pitch = slice_pitch; // maybe 0 for 2D image desc.num_mip_levels = 0; // must be 0 desc.num_samples= 0; // must be 0 desc.buffer = NULL; // used when CL_MEM_OBJECT_IMAGE1D_BUFFER mem = eclCreateImage(o_context->context, mem_flags, &format, &desc, host_ptr, &err); } else { mem = clCreateImage3D(o_context->context, mem_flags, &format, width, height, depth, row_pitch, slice_pitch, host_ptr, &err); } if (mem) { ERL_NIF_TERM t; t = ecl_make_object(env, &mem_r,(void*) mem, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // cl:create_image(Context, MemFlags, ImageFormat, ImageDesc, Data) -> // // #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_image_format format; cl_image_desc desc; cl_mem_flags mem_flags; cl_mem mem; ErlNifBinary bin; void* host_ptr = 0; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags)) return enif_make_badarg(env); if (!get_image_format(env, argv[2], &format)) return enif_make_badarg(env); if (!get_image_desc(env, argv[3], &desc)) return enif_make_badarg(env); if (!enif_inspect_iolist_as_binary(env, argv[4], &bin)) return enif_make_badarg(env); if (bin.size > 0) { host_ptr = bin.data; mem_flags |= CL_MEM_COPY_HOST_PTR; } else if (desc.image_width && desc.image_height && desc.image_depth) mem_flags |= CL_MEM_ALLOC_HOST_PTR; // use clCreateImage here ? mem = eclCreateImage(o_context->context, mem_flags, &format, &desc, host_ptr, &err); if (mem) { ERL_NIF_TERM t; t = ecl_make_object(env, &mem_r,(void*) mem, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #endif static ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { cl_context context; cl_mem_flags flags; cl_mem_object_type image_type; cl_image_format image_format[MAX_IMAGE_FORMATS]; cl_uint num_image_formats; cl_int err; UNUSED(argc); if (!get_object(env, argv[0], &context_r, false, (void**) &context)) return enif_make_badarg(env); if (!get_bitfields(env, argv[1], &flags, kv_mem_flags)) return enif_make_badarg(env); if (!get_enum(env, argv[2], &image_type, kv_mem_object_type)) return enif_make_badarg(env); err = clGetSupportedImageFormats(context, flags, image_type, MAX_IMAGE_FORMATS, image_format, &num_image_formats); if (!err) { int i = (int) num_image_formats; ERL_NIF_TERM list = enif_make_list(env, 0); while(i) { ERL_NIF_TERM channel_order, channel_type; ERL_NIF_TERM elem; i--; channel_order = make_enum(env, image_format[i].image_channel_order, kv_channel_order); channel_type = make_enum(env, image_format[i].image_channel_data_type, kv_channel_type); elem = enif_make_tuple2(env, channel_order, channel_type); list = enif_make_list_cell(env, elem, list); } return enif_make_tuple2(env, ATOM(ok), list); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_mem; UNUSED(argc); if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_mem, (info_fn_t*) clGetMemObjectInfo, mem_info, sizeof_array(mem_info)); } static ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_mem; UNUSED(argc); if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_mem, (info_fn_t*) clGetImageInfo, image_info, sizeof_array(image_info)); } // // cl:create_sampler(Context::cl_context(),Normalized::boolean(), // AddressingMode::cl_addressing_mode(), // FilterMode::cl_filter_mode()) -> // {'ok', cl_sampler()} | {'error', cl_error()}. // static ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_bool normalized_coords; cl_addressing_mode addressing_mode; cl_filter_mode filter_mode; cl_sampler sampler; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_bool(env, argv[1], &normalized_coords)) return enif_make_badarg(env); if (!get_enum(env, argv[2], &addressing_mode, kv_addressing_mode)) return enif_make_badarg(env); if (!get_enum(env, argv[3], &filter_mode, kv_filter_mode)) return enif_make_badarg(env); sampler = clCreateSampler(o_context->context, normalized_coords, addressing_mode, filter_mode, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &sampler_r,(void*) sampler, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_sampler; UNUSED(argc); if (!get_ecl_object(env, argv[0], &sampler_r, false, &o_sampler)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_sampler, (info_fn_t*) clGetSamplerInfo, sampler_info, sizeof_array(sampler_info)); } // // cl:create_program_with_source(Context::cl_context(), Source::iodata()) -> // {'ok', cl_program()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_program program; ErlNifBinary source; char* strings[1]; size_t lengths[1]; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!enif_inspect_iolist_as_binary(env, argv[1], &source)) return enif_make_badarg(env); strings[0] = (char*) source.data; lengths[0] = source.size; program = clCreateProgramWithSource(o_context->context, 1, (const char**) strings, lengths, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &program_r,(void*) program, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // cl:create_program_with_binary(Context::cl_context(), // DeviceList::[cl_device_id()], // BinaryList::[binary()]) -> // {'ok', cl_program()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_program program; cl_device_id device_list[MAX_DEVICES]; cl_uint num_devices = MAX_DEVICES; ErlNifBinary binary_list[MAX_DEVICES]; size_t num_binaries = MAX_DEVICES; size_t lengths[MAX_DEVICES]; unsigned char* data[MAX_DEVICES]; cl_uint i; cl_int status[MAX_DEVICES]; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &device_r, false, (void**) device_list, &num_devices)) return enif_make_badarg(env); if (!get_binary_list(env, argv[2], binary_list, &num_binaries)) return enif_make_badarg(env); if (num_binaries != num_devices) return enif_make_badarg(env); for (i = 0; i < num_devices; i++) { lengths[i] = binary_list[i].size; data[i] = binary_list[i].data; } program = clCreateProgramWithBinary(o_context->context, num_devices, (const cl_device_id*) device_list, (const size_t*) lengths, (const unsigned char**) data, status, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &program_r,(void*) program, o_context); return enif_make_tuple2(env, ATOM(ok), t); } // FIXME: handle the value in the status array // In cases of error we can then detect which binary was corrupt... return ecl_make_error(env, err); } // // cl:create_program_with_builtin_kernels(Context::cl_context(), // DeviceList::[cl_device_id()], // KernelNames::string()) -> // {'ok', cl_program()} | {'error', cl_error()} // #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_create_program_with_builtin_kernels( ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_program program; cl_device_id device_list[MAX_DEVICES]; cl_uint num_devices = MAX_DEVICES; char kernel_names[MAX_KERNEL_NAME]; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &device_r, false, (void**) device_list, &num_devices)) return enif_make_badarg(env); if (!enif_get_string(env, argv[2], kernel_names, sizeof(kernel_names), ERL_NIF_LATIN1)) return enif_make_badarg(env); program = clCreateProgramWithBuiltInKernels( o_context->context, num_devices, (const cl_device_id*) device_list, kernel_names, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_object(env, &program_r,(void*) program, o_context); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #endif // // @spec async_build_program(Program::cl_program(), // DeviceList::[cl_device_id()], // Options::string()) -> // {'ok',Ref} | {'error', cl_error()} // // // Notification functio for clBuildProgram // Passed to main thread by sending a async response // FIXME: lock needed? // typedef struct { ErlNifPid sender; // sender pid ErlNifEnv* s_env; // senders message environment (ref, bin's etc) ErlNifEnv* r_env; // receiver message environment (ref, bin's etc) ErlNifTid tid; // Calling thread ERL_NIF_TERM ref; // ref (in env!) ecl_object_t* program; } ecl_build_data_t; void CL_CALLBACK ecl_build_notify(cl_program program, void* user_data) { ecl_build_data_t* bp = user_data; ERL_NIF_TERM reply; ErlNifEnv* s_env; int res; UNUSED(program); UNUSED(res); DBG("ecl_build_notify: done program=%p, user_data=%p", program, user_data); // FIXME: check all devices for build_status! // clGetProgramBuildInfo(bp->program->program, CL_PROGRAM_BUILD_STATUS, // reply = !err ? ATOM(ok) : ecl_make_error(bp->env, err); if(enif_equal_tids(bp->tid, enif_thread_self())) s_env = bp->s_env; else s_env = 0; reply = ATOM(ok); res = enif_send(s_env, &bp->sender, bp->r_env, enif_make_tuple3(bp->r_env, ATOM(cl_async), bp->ref, reply)); DBG("ecl_build_notify: send r=%d", res); enif_free_env(bp->r_env); if (bp->program) enif_release_resource(bp->program); enif_free(bp); } static ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_program; cl_device_id device_list[MAX_DEVICES]; cl_uint num_devices = MAX_DEVICES; char options[MAX_OPTION_LIST]; ERL_NIF_TERM ref; ecl_build_data_t* bp; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &program_r, false, &o_program)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &device_r, false, (void**) device_list, &num_devices)) return enif_make_badarg(env); if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1)) return enif_make_badarg(env); if (!(bp = enif_alloc(sizeof(ecl_build_data_t)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? if (!(bp->r_env = enif_alloc_env())) { enif_free(bp); return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } ref = enif_make_ref(env); (void) enif_self(env, &bp->sender); bp->ref = enif_make_copy(bp->r_env, ref); bp->program = o_program; bp->s_env = env; bp->tid = enif_thread_self(); enif_keep_resource(o_program); // keep while operation is running err = clBuildProgram(o_program->program, num_devices, device_list, (const char*) options, ecl_build_notify, bp); DBG("ecl_async_build_program: err=%d user_data=%p", err, bp); if ((err==CL_SUCCESS) || // This should not be returned, it is not according to spec!!!! (err==CL_BUILD_PROGRAM_FAILURE)) return enif_make_tuple2(env, ATOM(ok), ref); else { enif_free_env(bp->r_env); enif_release_resource(bp->program); enif_free(bp); return ecl_make_error(env, err); } } #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { cl_int err; cl_platform_id platform; ecl_env_t* ecl = enif_priv_data(env); UNUSED(argc); if(ecl->icd_version < 12) return ecl_make_error(env, CL_INVALID_OPERATION); if (!get_object(env, argv[0], &platform_r, true,(void**)&platform)) return enif_make_badarg(env); err = eclUnloadPlatformCompiler(platform); if (err) return ecl_make_error(env, err); return ATOM(ok); } #endif #if CL_VERSION_1_2 == 1 // -spec compile_program(Program::cl_program(), // DeviceList::[cl_device_id()], // Options::string(), // Headers::[cl_program()], // Names::[string()]) -> // 'ok' | {'error', cl_error()}. #define MAX_HEADERS 128 static ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_program; cl_device_id device_list[MAX_DEVICES]; cl_uint num_devices = MAX_DEVICES; char options[MAX_OPTION_LIST]; cl_uint num_input_headers = MAX_HEADERS; cl_program input_headers[MAX_HEADERS]; size_t num_header_include_names = MAX_HEADERS; char* header_include_names[MAX_HEADERS]; ERL_NIF_TERM ref; ecl_build_data_t* bp = NULL; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &program_r, false, &o_program)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &device_r, false, (void**) device_list, &num_devices)) return enif_make_badarg(env); if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1)) return enif_make_badarg(env); if (!get_object_list(env, argv[3], &program_r, false, (void**) input_headers, &num_input_headers)) return enif_make_badarg(env); num_header_include_names = num_input_headers; if (!get_string_list(env, argv[4], header_include_names, &num_header_include_names)) return enif_make_badarg(env); if (!(bp = enif_alloc(sizeof(ecl_build_data_t)))) { err = CL_OUT_OF_RESOURCES; goto error; } if (!(bp->r_env = enif_alloc_env())) { err = CL_OUT_OF_RESOURCES; goto error; } ref = enif_make_ref(env); (void) enif_self(env, &bp->sender); bp->ref = enif_make_copy(bp->r_env, ref); bp->program = o_program; bp->s_env = env; bp->tid = enif_thread_self(); enif_keep_resource(o_program); // keep while operation is running DBG("ecl_async_compile_program: program: %p, num_input_headers: %d, bp=%p", o_program->program, num_input_headers, bp); err = clCompileProgram(o_program->program, num_devices, device_list, (const char*) options, num_input_headers, num_input_headers ? input_headers : NULL, num_input_headers ? (const char**)header_include_names : NULL, ecl_build_notify, bp); DBG("ecl_async_compile_program: err=%d user_data=%p", err, bp); if ((err==CL_SUCCESS) || (err==CL_BUILD_PROGRAM_FAILURE)) { // check if we need to save this until complete! free_string_list(header_include_names, num_header_include_names); return enif_make_tuple2(env, ATOM(ok), ref); } error: free_string_list(header_include_names, num_header_include_names); if (bp) { if (bp->program) enif_release_resource(bp->program); if (bp->r_env) enif_free_env(bp->r_env); enif_free(bp); } return ecl_make_error(env, err); } #endif #if CL_VERSION_1_2 == 1 // -spec link_program(Context::cl_context(), // DeviceList::[cl_device_id()], // Options::string(), // Programs::[cl_program()]) -> // {'ok',cl_program()} | {'error', cl_error()}. #define MAX_INPUT_PROGRAMS 128 static ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_context; cl_program program; cl_device_id device_list[MAX_DEVICES]; cl_uint num_devices = MAX_DEVICES; char options[MAX_OPTION_LIST]; cl_uint num_input_programs = MAX_INPUT_PROGRAMS; cl_program input_programs[MAX_INPUT_PROGRAMS]; ERL_NIF_TERM ref; ERL_NIF_TERM prog; ecl_build_data_t* bp; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &context_r, false, &o_context)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &device_r, false, (void**) device_list, &num_devices)) return enif_make_badarg(env); if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1)) return enif_make_badarg(env); if (!get_object_list(env, argv[3], &program_r, false, (void**) input_programs, &num_input_programs)) return enif_make_badarg(env); if (!(bp = enif_alloc(sizeof(ecl_build_data_t)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); if (!(bp->r_env = enif_alloc_env())) { enif_free(bp); return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } ref = enif_make_ref(env); (void) enif_self(env, &bp->sender); bp->ref = enif_make_copy(bp->r_env, ref); bp->program = NULL; bp->s_env = env; bp->tid = enif_thread_self(); DBG("ecl_async_link_program: context: %p, num_input_programs %d, bp=%p", o_context->context, num_input_programs, bp); // lock callback inorder avoid race? program = clLinkProgram(o_context->context, num_devices, num_devices ? device_list : NULL, (const char*) options, num_input_programs, input_programs, ecl_build_notify, bp, &err); DBG("ecl_async_link_program: err=%d program %p, user_data=%p", err, program, bp); if (program == NULL) { enif_free_env(bp->r_env); enif_free(bp); return ecl_make_error(env, err); } prog = ecl_make_object(env, &program_r,(void*) program, o_context); return enif_make_tuple2(env, ATOM(ok), enif_make_tuple2(env, ref, prog)); } #endif static ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { cl_int err; ecl_env_t* ecl = enif_priv_data(env); UNUSED(argc); UNUSED(argv); if (ecl->icd_version >= 12) { ecl_env_t* ecl = enif_priv_data(env); cl_platform_id platform; if (ecl->nplatforms <= 0) return ecl_make_error(env, CL_INVALID_VALUE); platform = (cl_platform_id) ecl->platform[0].o_platform->opaque; err = eclUnloadPlatformCompiler(platform); } else { err = clUnloadCompiler(); } if (err) return ecl_make_error(env, err); return ATOM(ok); } // Special (workaround) for checking if program may have binaries static int program_may_have_binaries(cl_program program) { cl_int num_devices; size_t returned_size; cl_device_id devices[MAX_DEVICES]; int i; if (clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, &returned_size) != CL_SUCCESS) return 0; if (clGetProgramInfo(program, CL_PROGRAM_DEVICES, num_devices*sizeof(cl_device_id), devices, NULL) != CL_SUCCESS) return 0; for (i = 0; i < num_devices; i++) { cl_build_status build_status = CL_BUILD_NONE; if (clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, NULL) != CL_SUCCESS) return 0; if (build_status != CL_BUILD_SUCCESS) return 0; } return 1; } // Special util to extract program binary_sizes static ERL_NIF_TERM make_program_binary_sizes(ErlNifEnv* env, cl_program program) { cl_int err; ERL_NIF_TERM list; size_t returned_size; cl_uint num_devices; size_t size[MAX_DEVICES]; int i; memset(size, 0, sizeof(size)); if ((err = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, &returned_size))) return ecl_make_error(env, err); if (program_may_have_binaries(program)) { if ((err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, num_devices*sizeof(size_t), &size[0], &returned_size))) return ecl_make_error(env, err); } list = enif_make_list(env, 0); for (i = num_devices-1; i >= 0; i--) { ERL_NIF_TERM elem = ecl_make_sizet(env, size[i]); list = enif_make_list_cell(env, elem, list); } return enif_make_tuple2(env, ATOM(ok), list); } // Special util to extract program binaries static ERL_NIF_TERM make_program_binaries(ErlNifEnv* env, cl_program program) { cl_int err; ERL_NIF_TERM list; size_t returned_size; cl_uint num_devices; int i; if ((err = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, &returned_size))) return ecl_make_error(env, err); if (!program_may_have_binaries(program)) { ErlNifBinary empty; enif_alloc_binary(0, &empty); list = enif_make_list(env, 0); for (i = num_devices-1; i >= 0; i--) { ERL_NIF_TERM elem; elem = enif_make_binary(env, &empty); list = enif_make_list_cell(env, elem, list); } enif_release_binary(&empty); return enif_make_tuple2(env, ATOM(ok), list); } else { size_t size[MAX_DEVICES]; ErlNifBinary binary[MAX_DEVICES]; unsigned char* data[MAX_DEVICES]; memset(size, 0, sizeof(size)); memset(binary, 0, sizeof(binary)); if ((err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, num_devices*sizeof(size_t), &size[0], &returned_size))) return ecl_make_error(env, err); i = 0; while (i < (int) num_devices) { if (!enif_alloc_binary(size[i], &binary[i])) { err = CL_OUT_OF_HOST_MEMORY; goto cleanup; } data[i] = binary[i].data; i++; } if ((err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*)*num_devices, data, &returned_size))) goto cleanup; list = enif_make_list(env, 0); for (i = num_devices-1; i >= 0; i--) { ERL_NIF_TERM elem = enif_make_binary(env, &binary[i]); list = enif_make_list_cell(env, elem, list); } return enif_make_tuple2(env, ATOM(ok), list); cleanup: while(i > 0) { i--; enif_release_binary(&binary[i]); } return ecl_make_error(env, err); } } static ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_program; UNUSED(argc); if (!get_ecl_object(env, argv[0], &program_r, false, &o_program)) return enif_make_badarg(env); if (argv[1] == ATOM(binaries)) return make_program_binaries(env, o_program->program); else if (argv[1] == ATOM(binary_sizes)) return make_program_binary_sizes(env, o_program->program); else return make_object_info(env, argv[1], o_program, (info_fn_t*) clGetProgramInfo, program_info, sizeof_array(program_info)); } static ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_program; ecl_object_t* o_device; UNUSED(argc); if (!get_ecl_object(env, argv[0], &program_r, false, &o_program)) return enif_make_badarg(env); if (!get_ecl_object(env, argv[1], &device_r, false, &o_device)) return enif_make_badarg(env); return make_object_info2(env, argv[2], o_program, o_device->opaque, (info2_fn_t*) clGetProgramBuildInfo, build_info, sizeof_array(build_info)); } static ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_program; cl_kernel kernel; char kernel_name[MAX_KERNEL_NAME]; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &program_r, false, &o_program)) return enif_make_badarg(env); if (!enif_get_string(env, argv[1], kernel_name, sizeof(kernel_name), ERL_NIF_LATIN1)) return enif_make_badarg(env); kernel = clCreateKernel(o_program->program,kernel_name, &err); if (!err) { ERL_NIF_TERM t; t = ecl_make_kernel(env, kernel, o_program); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // @spec create_kernels_in_program(Program::cl_program()) -> // {'ok', [cl_kernel()]} | {'error', cl_error()} // static ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_program; ERL_NIF_TERM kernv[MAX_KERNELS]; ERL_NIF_TERM kernel_list; cl_kernel kernel[MAX_KERNELS]; cl_uint num_kernels_ret; cl_uint i; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &program_r, false, &o_program)) return enif_make_badarg(env); err = clCreateKernelsInProgram(o_program->program, MAX_KERNELS, kernel, &num_kernels_ret); if (err) return ecl_make_error(env, err); for (i = 0; i < num_kernels_ret; i++) { // FIXME: handle out of memory kernv[i] = ecl_make_kernel(env, kernel[i], o_program); } kernel_list = enif_make_list_from_array(env, kernv, num_kernels_ret); return enif_make_tuple2(env, ATOM(ok), kernel_list); } // // cl:set_kernel_arg(Kernel::cl_kernel(), Index::non_neg_integer(), // Argument::cl_kernel_arg()) -> // {Type,Value} // {'size',Value} // {ecl_object,Handle,<>} object (special for sampler) // integer() == {'int', Value} // float() == {'float', Value} // list == Raw data // binary == Raw data // static ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_kernel_t* o_kernel; unsigned char arg_buf[16*sizeof(double)]; // vector type buffer cl_uint arg_index; size_t arg_size; void* arg_value; const ERL_NIF_TERM* array; double fval; int ival; long lval; unsigned long luval; size_t sval; ErlNifUInt64 u64val; ErlNifSInt64 i64val; ErlNifBinary bval; cl_int int_arg; cl_float float_arg; void* ptr_arg = 0; int arity; cl_int err; int arg_type = KERNEL_ARG_OTHER; UNUSED(argc); if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel)) return enif_make_badarg(env); if (!enif_get_uint(env, argv[1], &arg_index)) return enif_make_badarg(env); if (enif_get_tuple(env, argv[2], &arity, &array)) { if (arity == 3) { if (array[0] == ATOM(mem_t)) { if (!get_object(env,argv[2],&mem_r,true,&ptr_arg)) return enif_make_badarg(env); arg_type = KERNEL_ARG_MEM; arg_value = &ptr_arg; arg_size = sizeof(cl_mem); goto do_kernel_arg; } else if (array[0] == ATOM(sampler_t)) { if (!get_object(env,argv[2],&sampler_r,false,&ptr_arg)) return enif_make_badarg(env); arg_type = KERNEL_ARG_SAMPLER; arg_value = &ptr_arg; arg_size = sizeof(cl_sampler); goto do_kernel_arg; } return enif_make_badarg(env); } else if (arity == 2) { cl_uint typen; ocl_type_t base_type; size_t base_size; int vec_size; int value_arity; const ERL_NIF_TERM* values; unsigned char* ptr = arg_buf; int i; if (!get_enum(env, array[0], &typen, kv_cl_type)) return enif_make_badarg(env); vec_size = typen >> 16; base_type = typen & 0xFFFF; base_size = ecl_sizeof(base_type); if ((vec_size == 1) && !enif_is_tuple(env, array[1])) { value_arity = 1; values = &array[1]; } else if (!enif_get_tuple(env, array[1], &value_arity, &values)) return enif_make_badarg(env); if (value_arity != vec_size) return enif_make_badarg(env); for (i = 0; i < vec_size; i++) { switch(base_type) { case OCL_CHAR: if (!enif_get_long(env, values[i], &lval)) return enif_make_badarg(env); *((cl_char*)ptr) = (cl_char) lval; break; case OCL_UCHAR: if (!enif_get_ulong(env, values[i], &luval)) return enif_make_badarg(env); *((cl_uchar*)ptr) = (cl_uchar) luval; break; case OCL_SHORT: if (!enif_get_long(env, values[i], &lval)) return enif_make_badarg(env); *((cl_short*)ptr) = (cl_short) lval; break; case OCL_USHORT: if (!enif_get_ulong(env, values[i], &luval)) return enif_make_badarg(env); *((cl_ushort*)ptr) = (cl_ushort) luval; break; case OCL_INT: if (!enif_get_long(env, values[i], &lval)) return enif_make_badarg(env); *((cl_int*)ptr) = (cl_int) lval; break; case OCL_UINT: if (!enif_get_ulong(env, values[i], &luval)) return enif_make_badarg(env); *((cl_uint*)ptr) = (cl_uint) luval; break; case OCL_LONG: if (!enif_get_int64(env, values[i], &i64val)) return enif_make_badarg(env); *((cl_long*)ptr) = i64val; break; case OCL_ULONG: if (!enif_get_uint64(env, values[i], &u64val)) return enif_make_badarg(env); *((cl_ulong*)ptr) = u64val; break; case OCL_HALF: if (!enif_get_ulong(env, values[i], &luval)) return enif_make_badarg(env); *((cl_half*)ptr) = (cl_half) luval; break; case OCL_FLOAT: if (!enif_get_double(env, values[i], &fval)) return enif_make_badarg(env); *((cl_float*)ptr) = (cl_float) fval; break; case OCL_DOUBLE: if (!enif_get_double(env, values[i], &fval)) return enif_make_badarg(env); *((cl_double*)ptr) = fval; break; case OCL_SIZE: if (!ecl_get_sizet(env, values[i], &sval)) return enif_make_badarg(env); *((size_t*)ptr) = sval; break; case OCL_BOOL: case OCL_STRING: case OCL_ENUM: case OCL_BITFIELD: case OCL_POINTER: case OCL_PLATFORM: case OCL_DEVICE: case OCL_CONTEXT: case OCL_PROGRAM: case OCL_COMMAND_QUEUE: case OCL_IMAGE_FORMAT: default: return enif_make_badarg(env); } ptr += base_size; } arg_value = arg_buf; arg_size = base_size*vec_size; goto do_kernel_arg; } return enif_make_badarg(env); } else if (enif_get_int(env, argv[2], &ival)) { int_arg = ival; arg_value = &int_arg; arg_size = sizeof(int_arg); goto do_kernel_arg; } else if (enif_get_double(env, argv[2], &fval)) { float_arg = (float) fval; arg_value = &float_arg; arg_size = sizeof(float_arg); goto do_kernel_arg; } else if (enif_inspect_iolist_as_binary(env, argv[2], &bval)) { // rule your own case arg_value = bval.data; arg_size = bval.size; goto do_kernel_arg; } return enif_make_badarg(env); do_kernel_arg: err = clSetKernelArg(o_kernel->obj.kernel, arg_index, arg_size, arg_value); if (!err) { set_kernel_arg(o_kernel, arg_index, arg_type, ptr_arg); return ATOM(ok); } return ecl_make_error(env, err); } // cl:set_kernel_arg_size(Kernel::cl_kernel(), Index::non_neg_integer(), // Size::non_neg_integer()) -> // 'ok' | {'error', cl_error()} // // cl special to set kernel arg with size only (local mem etc) // static ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_kernel_t* o_kernel; cl_uint arg_index; size_t arg_size; unsigned char* arg_value = 0; cl_int err; UNUSED(argc); if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel)) return enif_make_badarg(env); if (!enif_get_uint(env, argv[1], &arg_index)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[2], &arg_size)) return enif_make_badarg(env); err = clSetKernelArg(o_kernel->obj.kernel, arg_index, arg_size, arg_value); if (!err) { set_kernel_arg(o_kernel, arg_index, KERNEL_ARG_OTHER, (void*) 0); return ATOM(ok); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_kernel; UNUSED(argc); if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_kernel, (info_fn_t*) clGetKernelInfo, kernel_info, sizeof_array(kernel_info)); } static ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_kernel; ecl_object_t* o_device; UNUSED(argc); if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel)) return enif_make_badarg(env); if (!get_ecl_object(env, argv[1], &device_r, false, &o_device)) return enif_make_badarg(env); return make_object_info2(env, argv[2], o_kernel, o_device->opaque, (info2_fn_t*) clGetKernelWorkGroupInfo, workgroup_info, sizeof_array(workgroup_info)); } #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_kernel; cl_uint arg_index; UNUSED(argc); if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel)) return enif_make_badarg(env); if (!enif_get_uint(env, argv[1], &arg_index)) return enif_make_badarg(env); return make_object_info2(env, argv[2], o_kernel, (void*) (unsigned long) arg_index, (info2_fn_t*) clGetKernelArgInfo, arg_info, sizeof_array(arg_info)); } #endif // // cl:enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(), // WaitList::[cl_event()], WantEvent::boolean()) -> // 'ok' | {'ok', cl_event()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_kernel kernel; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; cl_int err; cl_bool want_event; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &kernel_r, false,(void**)&kernel)) return enif_make_badarg(env); if (!get_object_list(env, argv[2], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!get_bool(env, argv[3], &want_event)) return enif_make_badarg(env); err = clEnqueueTask(o_queue->queue, kernel, num_events, num_events ? wait_list : NULL, want_event ? &event : NULL); if (!err) { if (want_event) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ATOM(ok); } return ecl_make_error(env, err); } // // cl:enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(), // Global::[non_neg_integer()], // Local::[non_neg_integer()], // WaitList::[cl_event()], WantEvent::boolean()) -> // 'ok' | {'ok', cl_event()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_kernel kernel; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t global_work_size[MAX_WORK_SIZE]; size_t local_work_size[MAX_WORK_SIZE]; size_t work_dim = MAX_WORK_SIZE; size_t temp_dim = MAX_WORK_SIZE; cl_event event; cl_int err; cl_bool want_event; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &kernel_r, false, (void**) &kernel)) return enif_make_badarg(env); if (!get_sizet_list(env, argv[2], global_work_size, &work_dim)) return enif_make_badarg(env); if (!get_sizet_list(env, argv[3], local_work_size, &temp_dim)) return enif_make_badarg(env); if (!get_object_list(env, argv[4], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!get_bool(env, argv[5], &want_event)) return enif_make_badarg(env); if (work_dim == 0) { return enif_make_badarg(env); } if ((temp_dim > 0) && (work_dim != temp_dim)) { return enif_make_badarg(env); } err = clEnqueueNDRangeKernel(o_queue->queue, kernel, (cl_uint) work_dim, 0, // global_work_offset, global_work_size, temp_dim ? local_work_size : NULL, num_events, num_events ? wait_list : NULL, want_event ? &event : NULL); if (!err) { if (want_event) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ATOM(ok); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if(o_queue->version >= 12) { if (!(err = eclEnqueueMarkerWithWaitList(o_queue->queue, 0, NULL, &event))) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } } else { // deprecated in 1.2 available in 1.1 if (!(err = clEnqueueMarker(o_queue->queue, &event))) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } } return ecl_make_error(env, err); } // // cl:enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) -> // 'ok' | {'error', cl_error()} // static ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if(o_queue->version >= 12) { err = eclEnqueueMarkerWithWaitList(o_queue->queue, num_events, num_events ? wait_list : NULL, NULL); } else { err = clEnqueueWaitForEvents(o_queue->queue, num_events, num_events ? wait_list : NULL); } if (!err) return ATOM(ok); return ecl_make_error(env, err); } // // cl:enqueue_read_buffer(Queue::cl_queue(), Buffer::cl_mem(), // Offset::non_neg_integer(), // Size::non_neg_integer(), // WaitList::[cl_event()]) -> // {'ok', cl_event()} | {'error', cl_error()} static ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; size_t offset; size_t size; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; ErlNifBinary* bin; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[2], &offset)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &size)) return enif_make_badarg(env); if (!get_object_list(env, argv[4], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!(bin = enif_alloc(sizeof(ErlNifBinary)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? if (!enif_alloc_binary(size, bin)) { enif_free(bin); return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } err = clEnqueueReadBuffer(o_queue->queue, buffer, CL_FALSE, offset, size, bin->data, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, true, false, 0, bin, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free(bin); return ecl_make_error(env, err); } } // // cl:enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(), // Offset::non_neg_integer(), // Size::non_neg_integer(), // Data::binary(), // WaitList::[cl_event()], // WantEvent::boolean() // ) -> // {'ok', cl_event()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; size_t offset; size_t size; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; ErlNifBinary bin; ErlNifEnv* bin_env; cl_int err; cl_bool want_event; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[2], &offset)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &size)) return enif_make_badarg(env); /* Check argv[4] (bin) last */ if (!get_object_list(env, argv[5], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!get_bool(env, argv[6], &want_event)) return enif_make_badarg(env); if (!(bin_env = enif_alloc_env())) { // create binary environment return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } if (!ecl_make_binary(env, argv[4], bin_env, &bin)) { enif_free_env(bin_env); return enif_make_badarg(env); } // handle binary and iolist as binary if (bin.size < size) { // FIXME: handle offset! return enif_make_badarg(env); } err = clEnqueueWriteBuffer(o_queue->queue, buffer, !want_event, // FALSE for async offset, size, bin.data, num_events, num_events ? wait_list : NULL, want_event ? &event : NULL); if (!err) { if (want_event) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free_env(bin_env); } return ATOM(ok); } else { enif_free_env(bin_env); return ecl_make_error(env, err); } } // // enqueue_read_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch, // _WaitList) -> {'ok',Event} | {error,Error} // static ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; size_t origin[3]; size_t region[3]; size_t row_pitch; size_t slice_pitch; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_origin = 3; size_t num_region = 3; size_t psize; size_t size; cl_event event; ErlNifBinary* bin; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); origin[0] = origin[1] = origin[2] = 0; if (!get_sizet_list(env, argv[2], origin, &num_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[3], region, &num_region)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &slice_pitch)) return enif_make_badarg(env); if (!get_object_list(env, argv[6], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!(bin = enif_alloc(sizeof(ErlNifBinary)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? // calculate the read size of the image, FIXME: check error return clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0); size = region[0]*region[1]*region[2]*psize; if (!enif_alloc_binary(size, bin)) { enif_free(bin); return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } err = clEnqueueReadImage(o_queue->queue, buffer, CL_FALSE, origin, region, row_pitch, slice_pitch, bin->data, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, true, false, 0, bin, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free(bin); return ecl_make_error(env, err); } } // // enqueue_read_buffer_rect(_Queue, _Buffer, // BufferOrigin, HostOrigin, Region, // BufferRowPitch, BufferSlicePitch, // HostRowPitch, HostSlicePitch, // WaitList) -> {'ok',Event} | {error,Error} // #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; size_t buffer_origin[3]; size_t host_origin[3]; size_t region[3]; size_t buffer_row_pitch; size_t buffer_slice_pitch; size_t host_row_pitch; size_t host_slice_pitch; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_buffer_origin = 3; size_t num_host_origin = 3; size_t num_region = 3; size_t size; cl_event event; ErlNifBinary* bin; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0; if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin)) return enif_make_badarg(env); host_origin[0] = host_origin[1] = host_origin[2] = 0; if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[4], region, &num_region)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[7], &host_row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[8], &host_slice_pitch)) return enif_make_badarg(env); if (!get_object_list(env, argv[9], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!(bin = enif_alloc(sizeof(ErlNifBinary)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? // calculate the read size of the image, FIXME: check error return size = (host_origin[0]+region[0])*(host_origin[1]+region[1])* (host_origin[2]+region[2]); if (!enif_alloc_binary(size, bin)) { enif_free(bin); return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } err = clEnqueueReadBufferRect(o_queue->queue, buffer, CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, bin->data, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, true, false, 0, bin, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free(bin); return ecl_make_error(env, err); } } #endif // // enqueue_write_buffer_rect(_Queue, _Buffer, // BufferOrigin, HostOrigin, Region, // BufferRowPitch, BufferSlicePitch, // HostRowPitch, HostSlicePitch, // Data::binary(), // WaitList) -> {'ok',Event} | {error,Error} // #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; size_t buffer_origin[3]; size_t host_origin[3]; size_t region[3]; size_t buffer_row_pitch; size_t buffer_slice_pitch; size_t host_row_pitch; size_t host_slice_pitch; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_buffer_origin = 3; size_t num_host_origin = 3; size_t num_region = 3; size_t size; cl_event event; ErlNifBinary bin; ErlNifEnv* bin_env; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0; if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin)) return enif_make_badarg(env); host_origin[0] = host_origin[1] = host_origin[2] = 0; if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[4], region, &num_region)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[7], &host_row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[8], &host_slice_pitch)) return enif_make_badarg(env); /* Check argv[9] (bin) last */ if (!get_object_list(env, argv[10], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!(bin_env = enif_alloc_env())) { // create binary environment return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } if (!ecl_make_binary(env, argv[9], bin_env, &bin)) { enif_free_env(bin_env); return enif_make_badarg(env); } size = (host_origin[0]+region[0])*(host_origin[1]+region[1])* (host_origin[2]+region[2]); // handle binary and iolist as binary if (bin.size < size) { // FIXME: handle offset! return enif_make_badarg(env); } err = clEnqueueWriteBufferRect(o_queue->queue, buffer, CL_FALSE, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, bin.data, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free_env(bin_env); return ecl_make_error(env, err); } } #endif // // cl:enqueue_fill_buffer(Queue, Buffer, Pattern, Offset, Size, WaitList) -> // {ok,Event} | {error,Reason} // #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; ErlNifBinary pattern; size_t offset; size_t size; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); if (!enif_inspect_binary(env, argv[2], &pattern)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &offset)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &size)) return enif_make_badarg(env); if (!get_object_list(env, argv[5], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); // Note: pattern must not be retained, it can be freed after this call // according to spec. err = clEnqueueFillBuffer(o_queue->queue, buffer, pattern.data, pattern.size, offset, size, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #endif // // enqueue_write_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch, // _Data, _WaitList, _WantEvent) -> // static ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; size_t origin[3]; size_t region[3]; size_t row_pitch; size_t slice_pitch; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_origin = 3; size_t num_region = 3; size_t psize; size_t size; cl_event event; ErlNifBinary bin; ErlNifEnv* bin_env; cl_int err; cl_bool want_event; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); origin[0] = origin[1] = origin[2] = 0; if (!get_sizet_list(env, argv[2], origin, &num_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[3], region, &num_region)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &slice_pitch)) return enif_make_badarg(env); /* Check argv[6] (bin) last */ if (!get_object_list(env, argv[7], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!get_bool(env, argv[8], &want_event)) return enif_make_badarg(env); if (!(bin_env = enif_alloc_env())) { // create binary environment return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? } if (!ecl_make_binary(env, argv[6], bin_env, &bin)) { enif_free_env(bin_env); return enif_make_badarg(env); } // calculate the read size of the image FIXME: check error return clGetImageInfo(buffer, CL_IMAGE_ELEMENT_SIZE, sizeof(psize), &psize, 0); size = region[0]*region[1]*region[2]*psize; if (bin.size < size) { return enif_make_badarg(env); } err = clEnqueueWriteImage(o_queue->queue, buffer, !want_event, // FALSE for ASYNC origin, region, row_pitch, slice_pitch, bin.data, num_events, num_events ? wait_list : NULL, want_event ? &event : NULL ); if (!err) { if (want_event) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free_env(bin_env); } return ATOM(ok); } else { enif_free_env(bin_env); return ecl_make_error(env, err); } } // // cl:enqueue_copy_buffer(Queue, SrcBuffer, DstBuffer, // SrcOffset, DstOffset, Cb, // WaitList) -> // static ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem src_buffer; cl_mem dst_buffer; size_t src_offset; size_t dst_offset; size_t cb; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer)) return enif_make_badarg(env); if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &src_offset)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &dst_offset)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &cb)) return enif_make_badarg(env); if (!get_object_list(env, argv[6], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = clEnqueueCopyBuffer(o_queue->queue, src_buffer, dst_buffer, src_offset, dst_offset, cb, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // enqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer, // SrcOrigin, DstOrigin, Region, // SrcRowPitch, SrcSlicePitch, // DstRowPitch, DstSlicePitch, // WaitList) -> {'ok',Event} | {error,Error} // #if CL_VERSION_1_1 == 1 static ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem src_buffer; cl_mem dst_buffer; size_t src_origin[3]; size_t dst_origin[3]; size_t region[3]; size_t src_row_pitch; size_t src_slice_pitch; size_t dst_row_pitch; size_t dst_slice_pitch; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_src_origin = 3; size_t num_dst_origin = 3; size_t num_region = 3; cl_event event; ErlNifBinary* bin; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer)) return enif_make_badarg(env); if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer)) return enif_make_badarg(env); src_origin[0] = src_origin[1] = src_origin[2] = 0; if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin)) return enif_make_badarg(env); dst_origin[0] = dst_origin[1] = dst_origin[2] = 0; if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[5], region, &num_region)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[6], &src_row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[7], &src_slice_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[8], &dst_row_pitch)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[9], &dst_slice_pitch)) return enif_make_badarg(env); if (!get_object_list(env, argv[10], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); if (!(bin = enif_alloc(sizeof(ErlNifBinary)))) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? err = clEnqueueCopyBufferRect(o_queue->queue, src_buffer, dst_buffer, src_origin, dst_origin, region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, true, false, 0, bin, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } else { enif_free(bin); return ecl_make_error(env, err); } } #endif static ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem src_image; cl_mem dst_image; size_t src_origin[3]; size_t dst_origin[3]; size_t region[3]; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_src_origin = 3; size_t num_dst_origin = 3; size_t num_region = 3; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image)) return enif_make_badarg(env); if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image)) return enif_make_badarg(env); src_origin[0] = src_origin[1] = src_origin[2] = 0; if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin)) return enif_make_badarg(env); dst_origin[0] = dst_origin[1] = dst_origin[2] = 0; if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[5], region, &num_region)) return enif_make_badarg(env); if (!get_object_list(env, argv[6], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = clEnqueueCopyImage(o_queue->queue, src_image, dst_image, src_origin, dst_origin, region, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // cl:enqueue_fill_image(Queue,Image,FillColor,Origin,Region,WaitList) -> // FillColor = <> // | <> // | <> // Use device endian! check device_info(D, endian_little) // // #if CL_VERSION_1_2 == 1 static ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem image; ErlNifBinary fill_color; size_t origin[3]; size_t region[3]; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_origin = 3; size_t num_region = 3; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&image)) return enif_make_badarg(env); if (!enif_inspect_binary(env, argv[2], &fill_color)) return enif_make_badarg(env); if (fill_color.size != 4*4) return enif_make_badarg(env); origin[0] = origin[1] = origin[2] = 0; if (!get_sizet_list(env, argv[3], origin, &num_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[4], region, &num_region)) return enif_make_badarg(env); if (!get_object_list(env, argv[5], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = clEnqueueFillImage(o_queue->queue, image, fill_color.data, // validate size etc! origin, region, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #endif // cl:enqueue_copy_image_to_buffer(_Queue, _SrcImage, _DstBuffer, // _Origin, _Region, // _DstOffset, _WaitList) -> static ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem src_image; cl_mem dst_buffer; size_t origin[3]; size_t region[3]; size_t dst_offset; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_src_origin = 3; size_t num_region = 3; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image)) return enif_make_badarg(env); if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer)) return enif_make_badarg(env); origin[0] = origin[1] = origin[2] = 0; if (!get_sizet_list(env, argv[3], origin, &num_src_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[4], region, &num_region)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[5], &dst_offset)) return enif_make_badarg(env); if (!get_object_list(env, argv[6], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = clEnqueueCopyImageToBuffer(o_queue->queue, src_image, dst_buffer, origin, region, dst_offset, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // cl:enqueue_copy_buffer_to_image(_Queue, _SrcBuffer, _DstImage, // _SrcOffset, _DstOrigin, // _Region, _WaitList) -> // static ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem src_buffer; cl_mem dst_image; size_t src_offset; size_t origin[3]; size_t region[3]; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_src_origin = 3; size_t num_region = 3; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer)) return enif_make_badarg(env); if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &src_offset)) return enif_make_badarg(env); origin[0] = origin[1] = origin[2] = 0; if (!get_sizet_list(env, argv[4], origin, &num_src_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[5], region, &num_region)) return enif_make_badarg(env); if (!get_object_list(env, argv[6], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = clEnqueueCopyBufferToImage(o_queue->queue, src_buffer, dst_image, src_offset, origin, region, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } static ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem buffer; cl_map_flags map_flags; size_t offset; size_t size; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; cl_int err; void* ptr; UNUSED(argc); UNUSED(ptr); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer)) return enif_make_badarg(env); if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[3], &offset)) return enif_make_badarg(env); if (!ecl_get_sizet(env, argv[4], &size)) return enif_make_badarg(env); if (!get_object_list(env, argv[5], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); ptr = clEnqueueMapBuffer(o_queue->queue, buffer, CL_FALSE, map_flags, offset, size, num_events, num_events ? wait_list : 0, &event, &err); if (!err) { ERL_NIF_TERM t; // FIXME: how should we handle ptr???? t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // enqueue_map_image(_Queue, _Image, _MapFlags, _Origin, _Region, _WaitList) -> // static ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem image; cl_map_flags map_flags; size_t origin[3]; size_t region[3]; size_t row_pitch; size_t slice_pitch; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; size_t num_origin = 3; size_t num_region = 3; cl_event event; cl_int err; void* ptr; UNUSED(argc); UNUSED(ptr); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&image)) return enif_make_badarg(env); if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags)) return enif_make_badarg(env); origin[0] = origin[1] = origin[2] = 0; if (!get_sizet_list(env, argv[3], origin, &num_origin)) return enif_make_badarg(env); region[0] = region[1] = region[2] = 1; if (!get_sizet_list(env, argv[4], region, &num_region)) return enif_make_badarg(env); if (!get_object_list(env, argv[5], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); ptr = clEnqueueMapImage(o_queue->queue, image, CL_FALSE, map_flags, origin, region, &row_pitch, &slice_pitch, num_events, num_events ? wait_list : 0, &event, &err); if (!err) { ERL_NIF_TERM t; // FIXME: send binary+event to event thread t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } // // enqueue_unmap_mem_object(_Queue, _Mem, _WaitList) -> // // static ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_mem memobj; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; void* mapped_ptr; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object(env, argv[1], &mem_r, false, (void**)&memobj)) return enif_make_badarg(env); if (!get_object_list(env, argv[3], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); mapped_ptr = 0; // FIXME!!!! err = clEnqueueUnmapMemObject(o_queue->queue, memobj, mapped_ptr, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #if CL_VERSION_1_2 == 1 // static ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_uint num_mem_objects = MAX_MEM_OBJECTS; cl_mem mem_objects[MAX_MEM_OBJECTS]; cl_mem_migration_flags flags = 0; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &mem_r, false, (void**) mem_objects, &num_mem_objects)) return enif_make_badarg(env); if (!get_bitfields(env, argv[2], &flags, kv_migration_flags)) return enif_make_badarg(env); if (!get_object_list(env, argv[3], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = clEnqueueMigrateMemObjects(o_queue->queue, num_mem_objects, num_mem_objects ? mem_objects : NULL, flags, num_events, num_events ? wait_list : 0, &event); if (!err) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ecl_make_error(env, err); } #endif static ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_int err; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if(o_queue->version >= 12) { if (!(err = eclEnqueueBarrierWithWaitList(o_queue->queue,0,NULL,NULL))) { return ATOM(ok); } } else { // deprecated in 1.2, available in 1.1 if (!(err = clEnqueueBarrier(o_queue->queue))) { return ATOM(ok); } } return ecl_make_error(env, err); } #if CL_VERSION_1_2 == 1 // // cl:enqueue_barrier_with_wait_list(Queue::cl_queue(), // WaitList::[cl_event()]) -> // {'ok',cl_event()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_event event; cl_int err; cl_bool want_event = true; // make this an arg? UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = eclEnqueueBarrierWithWaitList(o_queue->queue,num_events, num_events ? wait_list : NULL, want_event ? &event : NULL ); if (!err) { if (want_event) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ATOM(ok); } return ecl_make_error(env, err); } // // cl:enqueue_marker_with_wait_list(Queue::cl_queue(), // WaitList::[cl_event()]) -> // {'ok',cl_event()} | {'error', cl_error()} // static ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; cl_event wait_list[MAX_WAIT_LIST]; cl_uint num_events = MAX_WAIT_LIST; cl_int err; cl_event event; cl_bool want_event = true; // make this an arg? UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!get_object_list(env, argv[1], &event_r, false, (void**) wait_list, &num_events)) return enif_make_badarg(env); err = eclEnqueueMarkerWithWaitList(o_queue->queue,num_events, num_events ? wait_list : NULL, want_event ? &event : NULL ); if (!err) { if (want_event) { ERL_NIF_TERM t; t = ecl_make_event(env, event, false, false, 0, 0, o_queue); return enif_make_tuple2(env, ATOM(ok), t); } return ATOM(ok); } return ecl_make_error(env, err); } #endif // // cl:async_flush(Queue::cl_queue()) -> reference() // static ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; ecl_context_t* o_context; ecl_message_t m; ERL_NIF_TERM ref; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context return enif_make_badarg(env); if (!(m.env = enif_alloc_env())) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? ref = enif_make_ref(env); m.type = ECL_MESSAGE_FLUSH; (void) enif_self(env, &m.sender); m.ref = enif_make_copy(m.env, ref); m.queue = o_queue; enif_keep_resource(o_queue); // keep while operation is running ecl_message_send(o_context->thr, &m); return enif_make_tuple2(env, ATOM(ok), ref); } // // cl:async_finish(Queue::cl_queue()) -> reference() // static ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_queue; ecl_context_t* o_context; ecl_message_t m; ERL_NIF_TERM ref; UNUSED(argc); if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue)) return enif_make_badarg(env); if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context return enif_make_badarg(env); if (!(m.env = enif_alloc_env())) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? ref = enif_make_ref(env); m.type = ECL_MESSAGE_FINISH; (void) enif_self(env, &m.sender); m.ref = enif_make_copy(m.env, ref); m.queue = o_queue; enif_keep_resource(o_queue); // keep while operation is running ecl_message_send(o_context->thr, &m); return enif_make_tuple2(env, ATOM(ok), ref); } // // cl:async_wait_for_event(Event) -> {ok,Ref} | {error,Reason} // async reply {cl_event, Ref, Result} // static ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_event_t* o_event; ecl_object_t* o_queue; ecl_context_t* o_context; ecl_message_t m; ERL_NIF_TERM ref; UNUSED(argc); if (!get_ecl_object(env, argv[0],&event_r,false,(ecl_object_t**)&o_event)) return enif_make_badarg(env); if (!(o_queue = o_event->obj.parent)) // queue not found ! return enif_make_badarg(env); if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context return enif_make_badarg(env); if (!(m.env = enif_alloc_env())) return ecl_make_error(env, CL_OUT_OF_RESOURCES); // enomem? ref = enif_make_ref(env); m.type = ECL_MESSAGE_WAIT_FOR_EVENT; (void) enif_self(env, &m.sender); m.ref = enif_make_copy(m.env, ref); m.event = o_event; enif_keep_resource(o_event); // keep while operation is running ecl_message_send(o_context->thr, &m); return enif_make_tuple2(env, ATOM(ok), ref); } // return event info static ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { ecl_object_t* o_event; UNUSED(argc); if (!get_ecl_object(env, argv[0], &event_r, false, &o_event)) return enif_make_badarg(env); return make_object_info(env, argv[1], o_event, (info_fn_t*) clGetEventInfo, event_info, sizeof_array(event_info)); } static cl_uint get_version(char *version) { cl_uint ver = 0; version += 7; if(*version >= 48 && *version <= 57) ver += (*version-48)*10; version++; if(*version == 46) { version++; if(*version >= 48 && *version <= 57) ver += (*version-48); } /* fprintf(stderr, "V3 %s %d\r\n", version, ver); */ return ver; } // pre-Load Platform Ids and Device Ids, this will make the // internal IDs kind of static for the application code. The IDs // can then be used in matching etc. static int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr) { cl_platform_id platform_id[MAX_PLATFORMS]; cl_uint num_platforms; cl_uint i; cl_int err; if ((err = clGetPlatformIDs(MAX_PLATFORMS, platform_id, &num_platforms))) { *rerr = err; return -1; } ecl->platform = enif_alloc(num_platforms*sizeof(ecl_platform_t*)); ecl->nplatforms = num_platforms; ecl->icd_version = 11; for (i = 0; i < num_platforms; i++) { ecl_object_t* obj; cl_device_id device_id[MAX_DEVICES]; cl_uint num_devices; cl_uint j; char version[128]; cl_int ver = -1; if(CL_SUCCESS == clGetPlatformInfo(platform_id[i], CL_PLATFORM_VERSION, 64, version, NULL)) { if((ver = get_version(version)) > ecl->icd_version) ecl->icd_version = ver; } obj = ecl_new(env, &platform_r,platform_id[i],0,ver); ecl->platform[i].o_platform = obj; if ((err = clGetDeviceIDs(platform_id[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, device_id, &num_devices))) { *rerr = err; return -1; } ecl->platform[i].o_device=enif_alloc(num_devices*sizeof(ecl_object_t)); ecl->platform[i].ndevices = num_devices; for (j = 0; j < num_devices; j++) { ver = ecl->icd_version; if(CL_SUCCESS == clGetDeviceInfo(device_id[j], CL_DEVICE_VERSION, 64, version, NULL)) { ver = get_version(version); } obj = ecl_new(env, &device_r, device_id[j],0, ver); ecl->platform[i].o_device[j] = obj; } } return 0; } static int ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) { ErlNifResourceFlags tried; ecl_env_t* ecl; cl_int err; lhash_func_t func = { ref_hash, ref_cmp, ref_release, 0 }; UNUSED(env); UNUSED(load_info); DBG("ecl_load"); if (!(ecl = enif_alloc(sizeof(ecl_env_t)))) return -1; if (!(ecl->ref_lock = enif_rwlock_create("ref_lock"))) return -1; lhash_init(&ecl->ref, "ref", 2, &func); // Load atoms // General atoms LOAD_ATOM(ok); LOAD_ATOM(error); LOAD_ATOM(unknown); LOAD_ATOM(undefined); LOAD_ATOM(true); LOAD_ATOM(false); // async messages LOAD_ATOM(cl_async); LOAD_ATOM(cl_event); // Type names LOAD_ATOM(platform_t); LOAD_ATOM(device_t); LOAD_ATOM(context_t); LOAD_ATOM(command_queue_t); LOAD_ATOM(mem_t); LOAD_ATOM(sampler_t); LOAD_ATOM(program_t); LOAD_ATOM(kernel_t); LOAD_ATOM(event_t); LOAD_ATOM(char); LOAD_ATOM(char2); LOAD_ATOM(char4); LOAD_ATOM(char8); LOAD_ATOM(char16); LOAD_ATOM(uchar); LOAD_ATOM(uchar2); LOAD_ATOM(uchar4); LOAD_ATOM(uchar8); LOAD_ATOM(uchar16); LOAD_ATOM(short); LOAD_ATOM(short2); LOAD_ATOM(short4); LOAD_ATOM(short8); LOAD_ATOM(short16); LOAD_ATOM(ushort); LOAD_ATOM(ushort2); LOAD_ATOM(ushort4); LOAD_ATOM(ushort8); LOAD_ATOM(ushort16); LOAD_ATOM(int); LOAD_ATOM(int2); LOAD_ATOM(int4); LOAD_ATOM(int8); LOAD_ATOM(int16); LOAD_ATOM(uint); LOAD_ATOM(uint2); LOAD_ATOM(uint4); LOAD_ATOM(uint8); LOAD_ATOM(uint16); LOAD_ATOM(long); LOAD_ATOM(long2); LOAD_ATOM(long4); LOAD_ATOM(long8); LOAD_ATOM(long16); LOAD_ATOM(ulong); LOAD_ATOM(ulong2); LOAD_ATOM(ulong4); LOAD_ATOM(ulong8); LOAD_ATOM(ulong16); LOAD_ATOM(half); LOAD_ATOM(float); LOAD_ATOM(float2); LOAD_ATOM(float4); LOAD_ATOM(float8); LOAD_ATOM(float16); LOAD_ATOM(double); LOAD_ATOM(double2); LOAD_ATOM(double4); LOAD_ATOM(double8); LOAD_ATOM(double16); // records LOAD_ATOM(cl_image_desc); LOAD_ATOM(cl_image_format); // channel type LOAD_ATOM(snorm_int8); LOAD_ATOM(snorm_int16); LOAD_ATOM(unorm_int8); LOAD_ATOM(unorm_int16); LOAD_ATOM(unorm_int24); LOAD_ATOM(unorm_short_565); LOAD_ATOM(unorm_short_555); LOAD_ATOM(unorm_int_101010); LOAD_ATOM(signed_int8); LOAD_ATOM(signed_int16); LOAD_ATOM(signed_int32); LOAD_ATOM(unsigned_int8); LOAD_ATOM(unsigned_int16); LOAD_ATOM(unsigned_int32); LOAD_ATOM(half_float); // channel order LOAD_ATOM(r); LOAD_ATOM(a); LOAD_ATOM(rg); LOAD_ATOM(ra); LOAD_ATOM(rgb); LOAD_ATOM(rgba); LOAD_ATOM(bgra); LOAD_ATOM(argb); LOAD_ATOM(intensity); LOAD_ATOM(luminance); LOAD_ATOM(rx); LOAD_ATOM(rgx); LOAD_ATOM(rgbx); LOAD_ATOM(depth); LOAD_ATOM(depth_stencil); // partition_property LOAD_ATOM(equally); LOAD_ATOM(by_counts); LOAD_ATOM(by_counts_list_end); LOAD_ATOM(by_affinity_domain); // affinity_domain LOAD_ATOM(numa); LOAD_ATOM(l4_cache); LOAD_ATOM(l3_cache); LOAD_ATOM(l2_cache); LOAD_ATOM(l1_cache); LOAD_ATOM(next_partitionable); // Load options & flags // Device info LOAD_ATOM(type); LOAD_ATOM(vendor_id); LOAD_ATOM(max_compute_units); LOAD_ATOM(max_work_item_dimensions); LOAD_ATOM(max_work_group_size); LOAD_ATOM(max_work_item_sizes); LOAD_ATOM(preferred_vector_width_char); LOAD_ATOM(preferred_vector_width_short); LOAD_ATOM(preferred_vector_width_int); LOAD_ATOM(preferred_vector_width_long); LOAD_ATOM(preferred_vector_width_float); LOAD_ATOM(preferred_vector_width_double); LOAD_ATOM(max_clock_frequency); LOAD_ATOM(address_bits); LOAD_ATOM(max_read_image_args); LOAD_ATOM(max_write_image_args); LOAD_ATOM(max_mem_alloc_size); LOAD_ATOM(image2d_max_width); LOAD_ATOM(image2d_max_height); LOAD_ATOM(image3d_max_width); LOAD_ATOM(image3d_max_height); LOAD_ATOM(image3d_max_depth); LOAD_ATOM(image_support); LOAD_ATOM(max_parameter_size); LOAD_ATOM(max_samplers); LOAD_ATOM(mem_base_addr_align); LOAD_ATOM(min_data_type_align_size); LOAD_ATOM(single_fp_config); LOAD_ATOM(global_mem_cache_type); LOAD_ATOM(global_mem_cacheline_size); LOAD_ATOM(global_mem_cache_size); LOAD_ATOM(global_mem_size); LOAD_ATOM(max_constant_buffer_size); LOAD_ATOM(max_constant_args); LOAD_ATOM(local_mem_type); LOAD_ATOM(local_mem_size); LOAD_ATOM(error_correction_support); LOAD_ATOM(profiling_timer_resolution); LOAD_ATOM(endian_little); LOAD_ATOM(available); LOAD_ATOM(compiler_available); LOAD_ATOM(execution_capabilities); LOAD_ATOM(queue_properties); LOAD_ATOM(name); LOAD_ATOM(vendor); LOAD_ATOM(driver_version); LOAD_ATOM(profile); LOAD_ATOM(version); LOAD_ATOM(extensions); LOAD_ATOM(platform); LOAD_ATOM(double_fp_config); LOAD_ATOM(half_fp_config); LOAD_ATOM(preferred_vector_width_half); LOAD_ATOM(host_unified_memory); LOAD_ATOM(native_vector_width_char); LOAD_ATOM(native_vector_width_short); LOAD_ATOM(native_vector_width_int); LOAD_ATOM(native_vector_width_long); LOAD_ATOM(native_vector_width_float); LOAD_ATOM(native_vector_width_double); LOAD_ATOM(native_vector_width_half); LOAD_ATOM(opencl_c_version); LOAD_ATOM(linker_available); LOAD_ATOM(built_in_kernels); LOAD_ATOM(image_max_buffer_size); LOAD_ATOM(image_max_array_size); LOAD_ATOM(parent_device); LOAD_ATOM(partition_max_sub_devices); LOAD_ATOM(partition_properties); LOAD_ATOM(partition_affinity_domain); LOAD_ATOM(partition_type); LOAD_ATOM(reference_count); LOAD_ATOM(preferred_interop_user_sync); LOAD_ATOM(printf_buffer_size); LOAD_ATOM(image_pitch_alignment); LOAD_ATOM(image_base_address_alignment); // cl_nv_device_attribute_query extension LOAD_ATOM(compute_capability_major_nv); LOAD_ATOM(compute_capability_minor_nv); LOAD_ATOM(registers_per_block_nv); LOAD_ATOM(warp_size_nv); LOAD_ATOM(gpu_overlap_nv); LOAD_ATOM(kernel_exec_timeout_nv); LOAD_ATOM(device_integrated_memory_nv); // Platform info LOAD_ATOM(profile); LOAD_ATOM(version); LOAD_ATOM(name); LOAD_ATOM(vendor); LOAD_ATOM(extensions); // Context info LOAD_ATOM(reference_count); LOAD_ATOM(devices); LOAD_ATOM(properties); // Queue info LOAD_ATOM(context); LOAD_ATOM(num_devices); LOAD_ATOM(device); LOAD_ATOM(reference_count); LOAD_ATOM(properties); // Mem info LOAD_ATOM(object_type); LOAD_ATOM(flags); LOAD_ATOM(size); LOAD_ATOM(host_ptr); LOAD_ATOM(map_count); LOAD_ATOM(reference_count); LOAD_ATOM(context); // Image info LOAD_ATOM(format); LOAD_ATOM(element_size); LOAD_ATOM(row_pitch); LOAD_ATOM(slice_pitch); LOAD_ATOM(width); LOAD_ATOM(height); LOAD_ATOM(depth); // Sampler info LOAD_ATOM(reference_count); LOAD_ATOM(context); LOAD_ATOM(normalized_coords); LOAD_ATOM(addressing_mode); LOAD_ATOM(filter_mode); // Program info LOAD_ATOM(reference_count); LOAD_ATOM(context); LOAD_ATOM(num_decices); LOAD_ATOM(devices); LOAD_ATOM(source); LOAD_ATOM(binary_sizes); LOAD_ATOM(binaries); // Build Info LOAD_ATOM(status); LOAD_ATOM(options); LOAD_ATOM(log); LOAD_ATOM(binary_type); // Kernel Info LOAD_ATOM(function_name); LOAD_ATOM(num_args); LOAD_ATOM(reference_count); LOAD_ATOM(context); LOAD_ATOM(program); // Event Info LOAD_ATOM(command_queue); LOAD_ATOM(command_type); LOAD_ATOM(reference_count); LOAD_ATOM(execution_status); // Workgroup info LOAD_ATOM(work_group_size); LOAD_ATOM(compile_work_group_size); LOAD_ATOM(local_mem_size); LOAD_ATOM(preferred_work_group_size_multiple); LOAD_ATOM(private_mem_size); LOAD_ATOM(global_work_size); // Error codes LOAD_ATOM(device_not_found); LOAD_ATOM(device_not_available); LOAD_ATOM(compiler_not_available); LOAD_ATOM(mem_object_allocation_failure); LOAD_ATOM(out_of_resources); LOAD_ATOM(out_of_host_memory); LOAD_ATOM(profiling_info_not_available); LOAD_ATOM(mem_copy_overlap); LOAD_ATOM(image_format_mismatch); LOAD_ATOM(image_format_not_supported); LOAD_ATOM(build_program_failure); LOAD_ATOM(map_failure); LOAD_ATOM(invalid_value); LOAD_ATOM(invalid_device_type); LOAD_ATOM(invalid_platform); LOAD_ATOM(invalid_device); LOAD_ATOM(invalid_context); LOAD_ATOM(invalid_queue_properties); LOAD_ATOM(invalid_command_queue); LOAD_ATOM(invalid_host_ptr); LOAD_ATOM(invalid_mem_object); LOAD_ATOM(invalid_image_format_descriptor); LOAD_ATOM(invalid_image_size); LOAD_ATOM(invalid_sampler); LOAD_ATOM(invalid_binary); LOAD_ATOM(invalid_build_options); LOAD_ATOM(invalid_program); LOAD_ATOM(invalid_program_executable); LOAD_ATOM(invalid_kernel_name); LOAD_ATOM(invalid_kernel_definition); LOAD_ATOM(invalid_kernel); LOAD_ATOM(invalid_arg_index); LOAD_ATOM(invalid_arg_value); LOAD_ATOM(invalid_arg_size); LOAD_ATOM(invalid_kernel_args); LOAD_ATOM(invalid_work_dimension); LOAD_ATOM(invalid_work_group_size); LOAD_ATOM(invalid_work_item_size); LOAD_ATOM(invalid_global_offset); LOAD_ATOM(invalid_event_wait_list); LOAD_ATOM(invalid_event); LOAD_ATOM(invalid_operation); LOAD_ATOM(invalid_gl_object); LOAD_ATOM(invalid_buffer_size); LOAD_ATOM(invalid_mip_level); LOAD_ATOM(invalid_global_work_size); // cl_device_type LOAD_ATOM(all); LOAD_ATOM(default); LOAD_ATOM(cpu); LOAD_ATOM(gpu); LOAD_ATOM(accelerator); LOAD_ATOM(custom); // fp_config LOAD_ATOM(denorm); LOAD_ATOM(inf_nan); LOAD_ATOM(round_to_nearest); LOAD_ATOM(round_to_zero); LOAD_ATOM(round_to_inf); LOAD_ATOM(fma); LOAD_ATOM(soft_float); LOAD_ATOM(correctly_rounded_divide_sqrt); // mem_cache_type LOAD_ATOM(none); LOAD_ATOM(read_only); LOAD_ATOM(read_write); // local_mem_type LOAD_ATOM(local); LOAD_ATOM(global); // exec capability LOAD_ATOM(kernel); LOAD_ATOM(native_kernel); // command_queue_properties LOAD_ATOM(out_of_order_exec_mode_enable); LOAD_ATOM(profiling_enable); // mem_flags LOAD_ATOM(read_write); LOAD_ATOM(write_only); LOAD_ATOM(read_only); LOAD_ATOM(use_host_ptr); LOAD_ATOM(alloc_host_ptr); LOAD_ATOM(copy_host_ptr); // migration_flags LOAD_ATOM(host); LOAD_ATOM(content_undefined); // mem_object_type LOAD_ATOM(buffer); LOAD_ATOM(image2d); LOAD_ATOM(image3d); LOAD_ATOM(image2d_array); LOAD_ATOM(image1d); LOAD_ATOM(image1d_array); LOAD_ATOM(image1d_buffer); // addressing_mode LOAD_ATOM(none); LOAD_ATOM(clamp_to_edge); LOAD_ATOM(clamp); LOAD_ATOM(repeat); // filter_mode LOAD_ATOM(nearest); LOAD_ATOM(linear); // map_flags LOAD_ATOM(read); LOAD_ATOM(write); // build_status LOAD_ATOM(success); LOAD_ATOM(none); LOAD_ATOM(error); LOAD_ATOM(in_progress); // program_binary_type LOAD_ATOM(none); LOAD_ATOM(compiled_object); LOAD_ATOM(library); LOAD_ATOM(executable); // command_type LOAD_ATOM(ndrange_kernel); LOAD_ATOM(task); LOAD_ATOM(native_kernel); LOAD_ATOM(read_buffer); LOAD_ATOM(write_buffer); LOAD_ATOM(copy_buffer); LOAD_ATOM(read_image); LOAD_ATOM(write_image); LOAD_ATOM(copy_image); LOAD_ATOM(copy_image_to_buffer); LOAD_ATOM(copy_buffer_to_image); LOAD_ATOM(map_buffer); LOAD_ATOM(map_image); LOAD_ATOM(unmap_mem_object); LOAD_ATOM(marker); LOAD_ATOM(aquire_gl_objects); LOAD_ATOM(release_gl_objects); LOAD_ATOM(migreate_mem_objects); LOAD_ATOM(fill_buffer); LOAD_ATOM(fill_image); // execution_status LOAD_ATOM(complete); LOAD_ATOM(running); LOAD_ATOM(submitted); LOAD_ATOM(queued); // arguments LOAD_ATOM(region); LOAD_ATOM(global); LOAD_ATOM(local); LOAD_ATOM(constant); LOAD_ATOM(private); LOAD_ATOM(read_only); LOAD_ATOM(write_only); LOAD_ATOM(read_write); LOAD_ATOM(none); LOAD_ATOM(none); LOAD_ATOM(const); LOAD_ATOM(restrict); LOAD_ATOM(volatile); LOAD_ATOM(address_qualifier); LOAD_ATOM(access_qualifier); LOAD_ATOM(type_name); LOAD_ATOM(type_qualifier); LOAD_ATOM(name); // Create resource types ecl_resource_init(env, &platform_r, "platform_t", sizeof(ecl_object_t), ecl_platform_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &device_r, "device_t", sizeof(ecl_object_t), ecl_device_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &context_r, "context_t", sizeof(ecl_context_t), // NOTE! specialized! ecl_context_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &command_queue_r, "command_queue_t", sizeof(ecl_object_t), ecl_queue_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &mem_r, "mem_t", sizeof(ecl_object_t), ecl_mem_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &sampler_r, "sampler_t", sizeof(ecl_object_t), ecl_sampler_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &program_r, "program_t", sizeof(ecl_object_t), ecl_program_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &kernel_r, "kernel_t", sizeof(ecl_kernel_t), // NOTE! specialized! ecl_kernel_dtor, ERL_NIF_RT_CREATE, &tried); ecl_resource_init(env, &event_r, "event_t", sizeof(ecl_event_t), // NOTE! specialized! ecl_event_dtor, ERL_NIF_RT_CREATE, &tried); *priv_data = ecl; if (ecl_pre_load(env, ecl, &err) < 0) { CL_ERROR("ecl_pre_load: error code = %d", err); } ecl_load_dynfunctions(ecl); return 0; } #ifdef WIN32 #define RTLD_LAZY 0 #define OPENCL_LIB "opencl.dll" typedef HMODULE DL_LIB_P; void * dlsym(HMODULE Lib, const char *func) { return (void *) GetProcAddress(Lib, func); } HMODULE dlopen(const CHAR *DLL, int unused) { return LoadLibrary(DLL); } #else typedef void * DL_LIB_P; # ifdef DARWIN # define OPENCL_LIB "/System/Library/Frameworks/OpenCL.framework/OpenCL" # else # define OPENCL_LIB "libOpenCL.so" # endif #endif static void ecl_load_dynfunctions(ecl_env_t* ecl) { DL_LIB_P handle; if(ecl->icd_version < 12) return; if((handle = dlopen(OPENCL_LIB, RTLD_LAZY))) { eclUnloadPlatformCompiler = dlsym(handle, "clUnloadPlatformCompiler"); eclEnqueueMarkerWithWaitList = dlsym(handle, "clEnqueueMarkerWithWaitList"); eclEnqueueBarrierWithWaitList = dlsym(handle, "clEnqueueBarrierWithWaitList"); eclCreateImage = dlsym(handle, "clCreateImage"); return; } fprintf(stderr, "Failed open OpenCL dynamic library\r\n"); ecl->icd_version = 11; } static int ecl_reload(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) { UNUSED(env); UNUSED(load_info); UNUSED(priv_data); DBG("ecl_reload"); // FIXME return 0; } static int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info) { UNUSED(env); UNUSED(load_info); DBG("ecl_upgrade"); // FIXME *priv_data = *old_priv_data; return 0; } static void ecl_unload(ErlNifEnv* env, void* priv_data) { ecl_env_t* ecl = priv_data; cl_uint i; cl_uint j; UNUSED(env); DBG("ecl_unload"); for (i = 0; i < ecl->nplatforms; i++) { ecl_object_t* obj; for (j = 0; j < ecl->platform[i].ndevices; j++) { obj = ecl->platform[i].o_device[j]; enif_release_resource(obj); } enif_free(ecl->platform[i].o_device); obj = ecl->platform[i].o_platform; enif_release_resource(obj); } enif_free(ecl->platform); enif_rwlock_rwlock(ecl->ref_lock); lhash_delete(&ecl->ref); enif_rwlock_rwunlock(ecl->ref_lock); enif_rwlock_destroy(ecl->ref_lock); enif_free(ecl); } /* #warning "testing only, REMOVE before release" #define ERL_NIF_INIT_BODY \ DBG("erl_nif_init") */ ERL_NIF_INIT(cl, ecl_funcs, ecl_load, ecl_reload, ecl_upgrade, ecl_unload) cl-cl-1.2.3/doc/000077500000000000000000000000001301041406700132505ustar00rootroot00000000000000cl-cl-1.2.3/doc/.gitignore000066400000000000000000000000421301041406700152340ustar00rootroot00000000000000*.html *.css edoc-info erlang.png cl-cl-1.2.3/doc/clErlang.png000066400000000000000000001213101301041406700155030ustar00rootroot00000000000000‰PNG  IHDRÀÀRÜl pHYs  šœ OiCCPPhotoshop ICC profilexÚSgTSé=÷ÞôBKˆ€”KoR RB‹€‘&*! Jˆ!¡ÙQÁEEÈ ˆŽŽ€ŒQ, Š Øä!¢Žƒ£ˆŠÊûá{£kÖ¼÷æÍþµ×>ç¬ó³ÏÀ –H3Q5€ ©BàƒÇÄÆáä.@ $p³d!sý#ø~<<+"À¾xÓ ÀM›À0‡ÿêB™\€„Àt‘8K€@zŽB¦@F€˜&S `ËcbãP-`'æÓ€ø™{[”! ‘ eˆDh;¬ÏVŠEX0fKÄ9Ø-0IWfH°·ÀÎ ² 0Qˆ…){`È##x„™FòW<ñ+®ç*x™²<¹$9E[-qWW.(ÎI+6aaš@.Ây™24àóÌ ‘àƒóýxήÎÎ6޶_-ê¿ÿ"bbãþåÏ«p@át~Ñþ,/³€;€mþ¢%îh^  u÷‹f²@µ éÚWópø~<ß5°j>{‘-¨]cöK'XtÀâ÷ò»oÁÔ(€hƒáÏwÿï?ýG %€fI’q^D$.Tʳ?ÇD *°AôÁ,ÀÁÜÁ ü`6„B$ÄÂBB d€r`)¬‚B(†Í°*`/Ô@4ÀQh†“p.ÂU¸=púažÁ(¼ AÈa!ÚˆbŠX#Ž™…ø!ÁH‹$ ɈQ"K‘5H1RŠT UHò=r9‡\Fº‘;È2‚ü†¼G1”²Q=Ô µC¹¨7„F¢ Ðdt1š ›Ðr´=Œ6¡çЫhÚ>CÇ0Àè3Äl0.ÆÃB±8, “c˱"¬ «Æ°V¬»‰õcϱwEÀ 6wB aAHXLXNØH¨ $4Ú 7 „QÂ'"“¨K´&ºùÄb21‡XH,#Ö/{ˆCÄ7$‰C2'¹I±¤TÒÒFÒnR#é,©›4H#“ÉÚdk²9”, +È…ääÃä3ää!ò[ b@q¤øSâ(RÊjJåå4åe˜2AU£šRݨ¡T5ZB­¡¶R¯Q‡¨4uš9̓IK¥­¢•Óhh÷i¯ètºÝ•N—ÐWÒËéGè—èôw †ƒÇˆg(›gw¯˜L¦Ó‹ÇT071ë˜ç™™oUX*¶*|‘Ê •J•&•*/T©ª¦ªÞª UóUËT©^S}®FU3Sã© Ô–«UªPëSSg©;¨‡ªg¨oT?¤~Yý‰YÃLÃOC¤Q ±_ã¼Æ c³x,!k «†u5Ä&±ÍÙ|v*»˜ý»‹=ª©¡9C3J3W³Ró”f?ã˜qøœtN ç(§—ó~ŠÞï)â)¦4L¹1e\kª–—–X«H«Q«Gë½6®í§¦½E»YûAÇJ'\'GgÎçSÙSݧ §M=:õ®.ªk¥¡»Dw¿n§î˜ž¾^€žLo§Þy½çú}/ýTýmú§õG X³ $Û Î<Å5qo</ÇÛñQC]Ã@C¥a•a—á„‘¹Ñ<£ÕFFŒiÆ\ã$ãmÆmÆ£&&!&KMêMîšRM¹¦)¦;L;LÇÍÌÍ¢ÍÖ™5›=1×2ç›ç›×›ß·`ZxZ,¶¨¶¸eI²äZ¦Yî¶¼n…Z9Y¥XUZ]³F­­%Ö»­»§§¹N“N«žÖgðñ¶É¶©·°åØÛ®¶m¶}agbg·Å®Ã“}º}ý= ‡Ù«Z~s´r:V:ޚΜî?}Åô–é/gXÏÏØ3ã¶Ë)ÄiS›ÓGgg¹sƒóˆ‹‰K‚Ë.—>.›ÆÝȽäJtõq]ázÒõ›³›Âí¨Û¯î6îiî‡ÜŸÌ4Ÿ)žY3sÐÃÈCàQåÑ? Ÿ•0k߬~OCOgµç#/c/‘W­×°·¥wª÷aï>ö>rŸã>ã<7Þ2ÞY_Ì7À·È·ËOÃož_…ßC#ÿdÿzÿѧ€%g‰A[ûøz|!¿Ž?:Ûeö²ÙíAŒ ¹AA‚­‚åÁ­!hÈì­!÷ç˜Î‘Îi…P~èÖÐaæa‹Ã~ '…‡…W†?ŽpˆXÑ1—5wÑÜCsßDúD–DÞ›g1O9¯-J5*>ª.j<Ú7º4º?Æ.fYÌÕXXIlK9.*®6nl¾ßüíó‡ââ ã{˜/È]py¡ÎÂô…§©.,:–@LˆN8”ðA*¨Œ%òw%Ž yÂÂg"/Ñ6шØC\*NòH*Mz’쑼5y$Å3¥,幄'©¼L LÝ›:žšv m2=:½1ƒ’‘qBª!M“¶gêgæfvˬe…²þÅn‹·/•Ék³¬Y- ¶B¦èTZ(×*²geWf¿Í‰Ê9–«ž+Íí̳ÊÛ7œïŸÿíÂá’¶¥†KW-X潬j9²‰Š®Û—Ø(Üxå‡oÊ¿™Ü”´©«Ä¹dÏfÒféæÞ-ž[–ª—æ—n ÙÚ´ ßV´íõöEÛ/—Í(Û»ƒ¶C¹£¿<¸¼e§ÉÎÍ;?T¤TôTúT6îÒݵa×ønÑî{¼ö4ìÕÛ[¼÷ý>ɾÛUUMÕfÕeûIû³÷?®‰ªéø–ûm]­NmqíÇÒý#¶×¹ÔÕÒ=TRÖ+ëGǾþïw- 6 UœÆâ#pDyäé÷ ß÷ :ÚvŒ{¬áÓvg/jBšòšF›Sšû[b[ºOÌ>ÑÖêÞzüGÛœ499â?rýéü§CÏdÏ&žþ¢þË®/~øÕë×Îјѡ—ò—“¿m|¥ýêÀë¯ÛÆÂƾÉx31^ôVûíÁwÜwï£ßOä| (ÿhù±õSЧû“““ÿ˜óüc3-Û cHRMz%€ƒùÿ€éu0ê`:˜o’_ÅF—óIDATxÚìýw¼mÙU߉~Ç\a‡“o¾·nE•ª”Q.•2I&ÙÆÂH¤Æ6íæÁkœ _ÓÆà¸ýÚŸv7Ïv» ÆÐ„Œ±Ý€„Y¥ˆ¤*U¾·n<ùœÃ s¼?VØs­½Ï­[¥B¨Êµë³ëÜsÎ>kïµÖsŒñ¿ñ¢ª¼øxññ_ëüx ^|¼h/>^|¼h/>^|¼h/>^|¼h/>^|¼h/>^|¼h/>^|¼h/>^|¼h/>^|¼h/>^|¼À>|°•‚ù#2 hþ5Í¿Oòß½øÈrƒßéÓü¾Î]ò5è9_å`]–kÐŽaþôŸã7+} LIþ5Çžo† Oó{]ðZyš- žê¼ÎÞ„ð<ÞT$_!ÐZù× 7ï9\“I¾#¸XÚ@ó9|#ò7š# tò¯£Ü’ÚMÖçÙ—#¿,XÔEÈ)78¶©ý]±A¤5ƒÐ•>Ï<‚{m‚|®ùº\v á¹x¤ùÚ]¸8“{‚åÜ¿ÐGòL>°l;'QxÍ¿ò<»qõ΂ï‹{ùÎæ;;™9âïê»xâìVImÃÐìøÏ—k)µëKùZ<Ÿ¯Ë“Àj¾9››ð¾O÷ˆ€A¾&·D¿H|è$Iˆ¢‡’Ï7›ƒÏM§·¯4›{O¨¾tݘK;°›ªún‘/±{õÁüÝ¿©ÀÇúpÊÀí§À Ž`{ CqzZš6˜f¶“©_sé­Œ§³÷:qNÞ £m@<€0IÀ¤ N7²×o´Á³0íÁgGp¯@[a=ùÒ]ûtÿyÚpуc÷Âö|ù¯Q,ð¹4€ú¡D„ápÈÃ?ÌÖÖŸ»óÓôÇS.§×Q”Q:$4>‰ ’â x¢x"x|Q<ƒósðŒd?ðM¾’Œâ åk})þ¦øÞ9Åßå?C²c?“üûü¸‚AñŒ`´8†d¯ÅdÛ¹< Ï,#šâIˆ¡‘ƜƓ À º!Rlhš•¨ÍìÃÆ™Í¤{\;õ²ð8Þ†4!Õìo¬«Ù÷Vó4Ïæ_ÕIý´ G¤µŸ¥Z…,ÜïÓ#^W#ÿ}*Îg‘üçοUÀ jUâ£Í6¬­!'Nãýùÿ¼à9]ìîúžxâ |U¥ØlŸ+S(Žù™Ï|†O}êÓ\ºt‰—|×)Ò«–ØDX±ˆ*¢ jIQLé±³¯‚ó™Äõ•Åï4¹dS¨ü]áý¥øãâø’Ó8¯+ßZfÇ5ÙÑ´8†æ¿®D*"ÁCñ$ÀH OV0àÉHOšùç°ÙëåËVÊãdFd‹^ûÀl'Ûì5Eå$ˆÉØIö4ËY„i—3Œ!äÇK²*E¤if)„j5‚Ðz:ag¿××§øá\º­Î¿Ä9®ˆ³°ªQYq‡h4¡µÑÄ€ÒþcÌWþÀs¾ãkóÒ¥K<øàƒøÏõÂ888ààãÿ8O>ù$ßõ_Ï#Wê„i‘ú)±MHIQM@ó¥&: »ÇÏh#Í6MÑê}J£P4??-+ÔÖBiNè\|Ñì}¥žŒ„‘OZx²NË¿À{%Iúû¯]»ÎÃ?’y×” Cx¶QÑ`0à‘Gáà“Ÿü$—.]âï~Ñ€ƒhHߌÚ1‘MII²LNm¶I¸FÍ/ ¸’Î~&ùý+þVJÃpoŒsŒÒ(˜Ùq+ C/!³¨$3ˆ|Ù—Fç ŒY#eBÙ ôîBˆIí§ñÌ9ïëÏú쮩zoïÍ™w‹ÿb{`‡hã~ð6!z¢0‡Y¸Sì¾’‚šÜ8Ϧ5C—báKe“˜ÂÀIäàºÓ· mŸŽÓCý$Tˆ_ù^ü°uí*¾°¼¼D«Õþ‚7é+W®ðè£ðè£råÊå,r-ä™,~UÅZËh4boo'Ÿ|’OúÓ|âŸ`Ÿ áó'¾ç<Խ¾öé˜!S;%ÖÅ"X4ß³0Յ'E˜-RuvuB“ÂÈì3C(ï³j©:¡ÌþÍQia ŠˆAññ1xøÒÊ¿9O`V±„Þ;n[RÞøÚê ÑUÐàOÎ>ßä߀ þÙ,Öˆl^3 {ì ¶ãÊü¢T9âýµêÜë¨:\™°æ[ÒÜ¢¯[ÔZˆ¦ÀÒ*OI£”Ý'?OyÜ~ˆßÝ>ÏÒÒlll°¼¼ŒçyÝn¯ðÙBz6Eý~ŸÍÍM.\¸ÀO<Áþþ>÷çÿߺ»‡nïòêð‰¬Œ›¦Äqæ ê»úÍ<ë-~ÁÓíþ;;;\¾|™ ž¤ÛíÂßú×ßÃg®<ÅêWžbszÀ 1±1©f²äÑŠÅ+\¦ÈÜW)–iö•„&Ìßb}K-Î)ïu"I‰:Iþ2ƒˆ`Ä' …¥‡c–ð$$6lšUBï$mïýsᣪ-¿ÿB…EžBòÏ š{ yŒÆ7áé/ƒ]‚àÖk`¢>¤É¬è> J^s¸QŽ®‹k·•¼ix(®kug«z, RD«&£,¬%ÅÛ¾†4›°ºŠ6˜=ZçâÌàÓä‡ëó^7Šð§ž ÜL‘¦ŒÇcö÷÷¹~ý/^äÊ•«|ßß?íl²9î²Ç^4fœÄ¤XRM1&ÛĉµUê‹z¶áˆ“´J=,U»ŸR¹A„©¹‹Q²‹-’Ÿ³ƒQ‹Ñ̉ÁjDLŒoòÊ»&xÒÈ’^sŒ¦w‚eïÛîÞÏåºBŽäç&"™‡ÿ ÁôP3ÙÓe³b‹®,X]©-ø¹ð²H€¥š+ó ¶»èo‰Ö|qfŒ*hšUèÔâ!Í.fg‡[×?ÃæÒ—çÞ5ÁyFÌu¼Xá |—Zr3y€ªÇ1ƒÁ ý¯påÊ’$A·/±·½Ï!c£Ã$"ÒKšeýùâ×£âôb‹6ê RÁëg¹j-ü©£n¹‘ˆ©&Ó•ÝHL5Ñ-î¡“…>f _Öiš34ÌqgñS†"Ô‹qAGÊ B¢(â`ôUœnþ:išb’!¤QV;û—̤úN-µœ¨ŸÊ¬µû GS÷êe15`ó)M#dâ¡ý!`öv‘S¦–8Nð<ŸgRX”›ù¤íÆÏ$I˜L&%¤tùòe¶¶¶¿õ¯¿—‹û;\ÝÙ£›L¤S"Mˆ5Åæ‹¥tçõ°ê\t½]˜ðJ%Æt¸ÖKÜEh¤n¸5ƒöŒ40fÕZ5xÒ 5Zr -s s‚UÿÛ+1yšÚ™p½À㙽ZæQ1F<±óVÄ?þxíŒ^QäBþoC•u½-: 2ÕZ²µ(¿P·xy4eOÜzæº*¤’b`‘¦œÛûŒ=¦Xûìòwò‹]¿H¨Üì(سÛí²¹yK—.qåÊ•ò÷;:d;êÑcz2a’L±™-c°Øœçã8½YØB 6s,DóI¾I͉ê"´¡Øýk0w3‹ãš H4§5ž„øÒ$4Çiš„Þ+Þ7V®“ûübíüG’ºDò1f8JIŽŸ  ‚?›I O`dŒà@•sÑ*R¯ë_Eo@ZÍQ)eV³U§è)$ètŠŒF0I°‡=ä´”F bnúºgkÇSO~oôLÓ´Üý77·¸zõ*‡‡üðÏ—º{ì zô™2²S"µeÑ«ä½Ô<@að%j3w‘dn=+¥¨ƒ4ˆË†w·Š0o0Ž7²a1È MsšÐÛ`Å|5èRòÏ?žñÓæècvM>õØíHxë³”y‚‰­4:¢Uç¨]¾vaçP££“ÝʽP©’¼ò§8 F RhR£. GhoıÞ'Ë]¼é3C‚*u€&Z‡‹ŽŠý÷öö¸ví[[›|õŸ»Ÿ‹‡;lvé2¥/kg Í jÔߥ‚èHžŒ‰he—V碈®V#βž£R}w8Fâ†Z5/o5ÁSA$ƒ=³AÜ![BœÏxîs·h–Ý«^|QËrÐt’±Km±ÛØjø³°Õ§VíÕEÕj[á%Þ˜zÌáq–AÌ’ȩ̈žÀ¢i “1²u múø;—aãÍeH}3ÀMÕHf§bf ðÍáþN‡ÍÍM67¯Óétáì«Îryƒþ€®N'1±Í’à ‰©à8 $UDA믑٠9*êÔJ±ŒòO´N„,“m‹àá‰ÁHH('ðÄ£m¾Ò‰¹«qÿÿί56ûœqœð{Ÿh²¼$¤Þx-Ôkg4 ñjÞõˆ"×Üïä êx‘ùüVçÕÜBxc]» j ÛEö°ý)Kã‹5oôx7~]´£;Êp8dooÍÍM¶¶¶¯þsoÅÞÒé&t4fߌî€Í ÆE «ÕÀ¤z2R©Â»t©²Fµ†7‹.¾œÉù{7)6Y¶o– Y¡éÄìR%Þw“Þ/¥‡ˆÁ/ƒ,ÓiDbÖèEkÒÇ“*â5²[o§5òtçSãþT¨ÒÔB›ÚŽSßìœ0¶ôRç-åßÚ¡Ñ€NsØ¡¹ÿ8ÑêKÑܓ݌¨'Áªe`å-º±î½ÍÖÖ&‡‡ˆ_ö§_Ã'/³;ÐKFŒÓ˜8Çû3ØØ:©–õËÿpl`ÇÕd9ÿŸ‘ªÇ®o@*þ‹8T-©ù# |i`0¢Ú§-_ƒMmñIÓôK&ô©?Œ1yb˜Ý·Ï>æóÚ;”¨¿‚g5m$Öh²Ï´ƒÒ%PiÍKè‚äY†\RÀO—æmࣣ1º>ª5ÂóüJ8tã$¸ê L…·tƒäw4qppÀÎλ»»€°¼ÞfsÒaoÚ§›Œ¥Ý¡¬)äɯ–Q·–éŽ89 $^tÉÅ¥¦nÛ s·Df}…™©8›‹K—”òzEhÖñM Ñ¥äø¥úh¥Z\ˆ¬MùÌç‡xÁJH*í¬±2ÖhY§ )ónÚ­TÐÏZÕw¡³ês`9!ïé¨!­®P±9ktÐ…ñ ±6dmû÷ð}¯Lþ PäæB n. . Ï"ùÝÙÙáààø¾Ÿú.>¶‰Ýé€^4f’Æ9Ë<'c14µ`3ª–ŒN5…pô< 'ª³&ç".bBÔÁ\/2_IÈ®ºQcŒ4Ì:-¹'÷Œ‹aÏ/ÅGáŒÉ¼@§DÚšLˆÐ$¯v´`ßÑy–g°?¢¨¥Zå‰.®Ö;,Ñ9ßïä*ý7ЍAÇÓ,¼1›W Ü$Y_æÓ"%ÁþQ0—ö0™Lèt:ìîî²··G’Ĉׇ]ö¢Ý8[ü‘͸>ÆÔr)AËYµÕÁڔʼnòÂTI]¿K…˜ÕÜTçìY;¥ˆAŒ‡/ BYÅò:R-vü£CÄ/­<@ð}Ï39cR9è°AF8#Π8œ=:„Q9¨S´†I=î—*´vgÍzÊ|öPËÒbF´ÓE‡ÇóÝ_çá§çé¬U—îîtI’08<<äðð^¯ Àÿø3ÿ/¶'}¢ÃxÊ´(´äx¼ºA†VQÉ_0ƒ„]ˆM«Qe…x5«F΀R™_Ú³V®ª‡P÷þH¥¯@0²Œ!¤iNáËR﯇?_ªÏ"*êijù¿³øMm  a¾™Z›$ ’Üÿ–Eñú¢Ðé¨â™›]P)ÖY~P¾6³|/Ia:E{]è<þëFÁQŒZ·^S¢“Zö,âJÌП~¿Ïáá!Î!Ãáº2å0Ñ&LlB’³ Ëþ\—ÜàpÌËXÕÔ¨ãRµ{9ŠEë_fÐlº€­(e¼+³ÖH-x?‚'>,Ñ4·JŒ–á>/Â÷¤©Í¥h¦ììtP9Å$iàÑÄHjgYS½9h¡ä,Hvkt©Ã© þFª †Ö€uz´Î¶(Š¥q‚L§è4E#›s¢ªiÈQµ¬zŸ†?ÿ‹Ù"-Ÿ^¯Çáá!Ýn—(Šyõý÷°;p82H¦e£ N]Oœê¡Ô¸ZcujáŒ.ôª*óA¾Ö³Ò;•ÖFuŒ@]Ú„Tw-1ŸU|³œ‰›È±|×OH’„8Nèõz ƒ/éHUéõzloo±µµÍîî.N彑à‡V³l¯Öv|Ë<Š ‹Qåh%… eWk!м! í½²â¥Â,m´Ð8BÒÚ«à·ÐƉ*\΋¹u‹ª@}—+hσAŸÁ Ïp8BÞûW¿–\’ñhÊD²¢WX¢4E²;½f^¡lö’Y3–ÌâA*,NÍwë òãzŽBÝAó¡ GH­µR3)Mì!Ö4‰ô*+éû‰Ò˜8ŽÙÙÙå©§.±³³Íx<æK}¶òp8bww—ÃúÝ.£ÑÕWÒð#Þ”8š:Å,;~ÈÆèBªm$§ÕÂW…î  3U(¢`¨檃NÉt’i y º³®-#;—á®·Qe3aÎbÔÚ™‡ðëíî3Š"†Ã!Ýn~¿Ïx<`:¤OÄTS¦šjFy3‹*"•lÞÉ;«×ÇYÐÅ’—‚ê ÿ½æ2Ëc8êü«.À¥ÅåÙ*á“¢¨x¤Ä$dóþþ>O>y‹/qåÊeˆ¢¸¶›|é=F£1Î!ƒÁ £E!ˆõvL4TÚÒ#¦MÀd>ä­…ŽzD×X´X¯ÏhÌ©LTY¹R„¹R%ñj2ESÍE ãñˆ×¿û•LG ’)C/É!¨—‡£3 ¸lm”*j¦: ˆ©Æ âØex2_a.4ÄA‚p…°jXô¬5SIu š’ê˜'/<Éc=Á£>ÆSO=ŵk›ôû½²åóùð˜õ €¦C|3ÅÈ4ÅK™ðÖÂJí‚4`ÎÀœueÔŠ-(Í»,'¿“êï\þ*:¡‡™ÄØáoÿ2rê•õ»( Zô;' žçþL&†ÃÃáÉdDš¦ü©ï}¿óÐCŒdÂÔÆ$6Íwö¼áÄâÙ9IEò¤ÐÛQµqtfÝZÎ 1Nƒ¸q¢D­J™¸-®®ÄbWTU3`5"ÖSÛ`·÷iþðŸä‘Gcss“EâL_rò¥ P8¾ÑdÜïÐÒ!žLAŒ¦3þ}%‘Õ ŠVi¦gQ>ì$uÊò\ás WÐLjuÝִцt”㉴ßEâãÐæÈ—C¤<‚Ò£5Õ‹J?€Ëu‰¢ˆÑhD¯×§ßï3eáÏa4f¬ #“0"É]ÈyÜõž]ª¨tN¡F¨‡$Žòaå5±«B'h‘4¢K¿5³×hE›¦0^‹•”XÇL“ç^óþËV±Óûñ¥ÁÒRƒM“ò}ªR‹þq^WT¦…yVäÑÅŽE_u~qÕÐ’ÐSH§ˆa']&½0$€ÍG6Ø#‡:Š6Ÿ/Àþ¥–(ëñmŽ\ÊìÞižˆÍdr*O)ùòÙÇIRt!“íôÐÁ¨¦.’=ëˆÝLT€º˜—F)ŸN§Àÿ{¬l,q81± “É!†ºœn ˆÙlDçê$RgoŠdý†eZ ª”<"©)z¸P‘ÊQ.¸€e)%-1)B¢}ÆqŒõÇøAp8ö·ÇÜzÛRpH“-Äý]£0Nñ¯ø¹q*زhñkãþÞ:nTt^¤Á©ÆÚ8¥{8¡&LhzC$gMª½º‹bt׋Î3kõ̵JêâðÊÝ|˜oV-ªÏ™xBîé'£YîeÉê“ L&èh‚§Q®»êî7ò:CêrEÛã`Ð/¡¿(Šø+ÿÏóàh‘Ù)©jºZ³xC%— ] @¥Ñ¨T ÎZ_{5‘Õêî/²˜dE%’Jb&®†¥ÑÒƒYMImB,4,cOðñ™N„áz£~¹Ð š?«³}\©†û·âò•*š9,ÜÕ)n~‘!á¡Êwu#ß &‰$Êš`ÔÇ ‹šÕkø~éEíÌ#Ì¥õy­ñQšÛåþÛ‘»$WŒÈ„"²&ùòíÒcd2ñÿ‡àÖ¯X@{Ð#Èp•¦x§4íôýCF£“ɇ–nwÈD-S›àáÍØ­Å‚–º·®—¿g;Ÿ[Ó•<ƒÕšjs±pTœ‹ï‚ âú‰Y׌Pjk¼—ÌŸJ%+ V ©ñ3&Q°êᵄӷ x¶\Ȧ4<µô&—Xw ǤÞTv|P›·ºðY‘Ø^ÂæÌØârX%±I¦ÀVÄüjó¿·UÌþ(‘\ÕyR•ê 8AÌ¿¦ŽÈ©T °²þjásáHÄ£ˆÍÕ"4…8(AÇè ·Í’þ ?*púêña£ÑˆÁ`Àh4b44B£1ÝÁ¡‰§ ˦I?:´­êm:¤~­ÁœsWíˆaCy®h¥TaSMºdNz{F–›W³”™7RÅŠà¢+Œ±¤#d¬Vñ§X–áW•±¢µüEò^è<¦¥FÕ¨`nL¯õØ^]*'ôqÇ êl÷'£Sb»†g‡›dq¿Ëâ/7Š#EÕߺ·¨cúuk®÷h­ç£¢óš#|Nx;Û"õ}ˆã<¶NËù6Ž1ÓL+9€Û»½*ÉpTŠY7QÌxV£B%Zí}‘¨Uä¶È¬µŠ–V+ZG@º‹ŒéétÇs[\µš[(YN@šyI£*Ê$²Ð¸¹~A…ža¤Y4N‰ã˜4M²Y_IÌ8K¡«4O£m9>H«d³²6à>f@eQe\ç›X\6³“Ì–›¸™¯â»»D)£^zˆª«Wg«u«ÙZ“Ó¬‰G¥Þ{6ÇZ¯m¡àæþtQ¥Þùœóñ´Îu±9ÝÎ2ÿÚë›–üO~÷÷._µ G ày!¾bL@¢!JƒG/®òëÿO“þ f³ú‚¶;¨(R»\U[J¤‹Ã$Ua!ïÇ¥5,$ˆ9‹ßԼʣ+ŸþÃU¡ÑhÐj®°º¶ÎÊÊ «++,¯¬>ÑtJ§{HšŒ¯Áç/®Ñ ?Íç&¤K°¤CP Óú"%‡¥: *Ïz„—˜ƒføt®xçÆ¬öˆëCí:9-­h‰ÙYõp8*åˆXÉ“d-Á'· lieýúÔVvÿÉdœ?3DSbM‰RKbíBó§¡¥*®Z$¾Ty<¨¬ÞHQ EËö9n§2¿óªÃ†¬P0ÜÝÊe滌”\íŕ˙š'¨j…™›J’,L#øO¿•ðé?šÍ Xfii•cl;Ʊ VWWi·Ûø¾O¯×áêµ+Äñfk…ǯ½ÃÇ9:ë#-0¡Á+‰ou=þ£8®BCMí¹^ä²GLî¬_#‘Å?«‡iGLŒ,5A‚„ºZ¡sáT–é ¹nªJ4›ÓP×nU!AeTUŽ™L¦ŒÇ&“1qs0’.yÄɘ±$ea‡ZEWt&sXÁGD@{5ªtù­V¡8î×E`XÌ*wè®ö­Lfz5ZBé9ë´"ÇX«2×–¿°19z(Ühdñ íÖ2ëëk¬¯¯sâøqŽ;ÆÊÊ Fƒ0 8vìí¥®^¹@’Nñ›wqq»úœJ²á¼zò¤õDvASŠ. WÜ{VŸó%µ„ɹl˜¯W«›—5  T¢tqÐ4wžIìŒ]š…­GißÖf„áP §L&ã2 ‰—ñ™ôb›æ ¯ÐòŒÓ âŒâT7>1óžÂ]ˆŠÌ Lp·k« ¹ê•‹Q1©íØ 7êC›Ëœ ¥dV{¨#<‹—º,\ö®gZ^¾îk ŽÇÎn‹¥¥6«««ÙîìX91 ÃRÙayy™À7\º|ÕÓ|#Wvû´Ã'ð$›^/êúÙ{aÑn.ó»†=‚¾ 7Ø^ä&ªÃ¢7ü¥"Gᆵ:Ýg‹!×+”9ž…2Í<ÕtXÙéÝ$Ø ÜúB%©Œâ8&Ž£Myß÷ÿIñ”Q’ ‰Írµø…¾$³„Rb—.ª¼Êåtã^‡W'i¹MÓîDBw×g¾i~!H©ñJÝyËöpëÔœß#sÉo½:<;¢ï+÷¼–—…ƒNƒv«ÅòÒ2«++¬¬¬°´´D»Ý&L^Én5›Üqû$IÌÕk—QÒà>žÜá™M<“sšâIYe®„:õÉ0 §?ê¼a°ˆÖ ó -óðÚüV°°å’ZO´ç{‡ç§_jN±ˆ­÷¶Ïçn”±äµÚ“Má›ImŸ½û4C3J#†d 5*`…,Æ—ÙiªƒÃ]|K×WňfùC…!¢³PCê"ºª5•ÉEƒÞôY{q6HѳÕYµZtQ1´Þ   Œ­úØÞQ¢È§Ý h4›´Û-šÍfö„xž—‰[™l}'4[-n»íN:K§øÁSy—¶–9ìÆ F)“È’$ZrÝæk2ŸØÞˆÐVO¨EªaŽ; rNZPTt:•ÅÀ¬¸^IŽøs .Nâjõu#'SUÎ4pŠ1F¾ïAÛgšÆ¤IŠUKªšó®k‹¢Ä ¥zʺ8“D7ŸÔºó~)ûDÝA sWp®“ÉAgçöouT´hJ4Oç„Iç–?µö7;Ð:¬…]šM} ðñ}o¶ðóêeœ¸²¼Âm·ÞÎêʓɄ y†nr?{‡ ^Â`”%J’.ZɵB7h¼©(ÀéÂüy.ö‘# f ‹[¼æïלÈÊ¢YÆ*•~­—*p'ujm–Cuâf)ŒåþÁ,^ÒRal0M3ÝŸ¸ÈžÅå<æ;¤™QŒ©ló £rTAF¥¶ˆkù–ê,ÄBJRÚl}j-TÕÅ®*޲8zFy¹8¾qZìfÅ™jð% Lè^#0 žfÓb|‹ˆ-+Æ ¤…útÞka­%M’4emmÛn»ƒF£Ét2!hÝÊåÎ[Øï$tû)ÃaJœ¦¤é J¬Æøõ¤xö¯ZC‰Ý æõA*ŠÍuq¤VuŒu.©câ¥ÄqI|t²ŽDz•Q]FÙ]E%2IcLv€D•X³1GåÍÊg¼«dœõ¯I*l>÷bëâ´ªÓª®ú‚8 i­Ú,UòÖ¬3#âÍÙVÁS*H‘ÚëË×7¼âïóRUDË*yÑ+Ÿí&ÎûHþš¢Pé íšb|VÉyÚ¹Ë."dªÓmMi8ƒ%­§X“Ý(—|ØòúRLj]ºrÝæ‚¡ªº³:œðr¡ÎAÅâ”»×£†½iµ-±¾I‰#ÀdrªF±X–#³s59·¥E¢@R!IãxŒìŸ;#¼ù SÂ0EÄ¢š 6Ʀ jgóomšfOçÆÅq‚1'Nžæô©³ÙL°$!m%W÷–9ì&ô)“‰%I3Ræ{Rï1pnHyZùf`¤šÈÆRù9åâ«H&š\翞 5ƒ©W—¥vÅjdJ­EM®ñžÆ,D€É#–Tˆ§›¬‘ZE…”27(‰H–Üb æ^¡´å.öY¹]‹ÌáZ‘>©Ï[0 þ(o[¹Æ3š…Ö$Xª )RNН ûâÒ·ëÏ"XZ<ÍRæSrðŒpútÊ_»ÇöîḉÕ$ID’ÄÙèOgÈ…»hš[‹ïûœ¿õ6&“1;{[„}óå\ÙùÏ^Œï žãtÜÅ*S8¢u%¶º€’:E1©¥æ:Ý(X«»žÛ<íÒLF?Wú–j²\õÞ³ˆ#™¡>ôqQO€ûÕ¯&3 übVs£Eº«¤&§@[›s8Œ`,j19õ¶Eww¬°Ckø–ËǙۡ«’êe|/²˜Z«³]­Œ]ÞCeHF6¾toÑ0+@Œ§ŠÅ09Ÿã(û¹GѲ©eÝ«Qv)_Ć·Ýû<ñÔ“DÑ-L§>ÍfÏ÷0ÖÃÏiàžáµªŠäÓÏÝr+“iD·»G®±;~ ýñ<‹çyˆ1ˆïá‹T^f²€õ™/jOæªùs‰tá1Rʹ{br»7ûyV¡uþmr(Ó(j²®.Í¿—ü8øÌ:ÝÒìX’ã÷R¨¹XÏCš-h6¡ÑÄ®ž©ˆ;ÔÃ7(ŒÀ¯{vÝCÎòT%Ís€|–z¾X E¬à™,Oðܦ3¿3Î*yµ‹o\÷Vkœ­„ ´áœDÈÙ­ dÌá(UÔ¨…ÊŒAP/!¡‡—Ç¡FfFAFQQf¼#­¨?÷wyr]Q€ðß74ÊR[9¾~•݃?dyéˉ“™Ž ü&žïðjˆ—MSÄKKKœ?ž8žÐïÒl¾”kÝ>Ë' }Å7&›w˜Ì#X·Ïb~vÚú“¾S(¼@Þ¦6Ï™R Htfp&ï8³:3Œ´p‡y¿³Í:6Ï'm.Wêå¯O Ø3û·¨A‡4<$ô`i¹qu#¨Õó]­ÄYæžx Ä¥!Ìžy1,ßP«ÍðÎ`&õ(jñKØLXZ™²±1a8ºÎÖæ‡ð÷Nrî–×Ñn¯âkˆç…•’~ÜŒ³ü`uu³gÏ3™Løæ¥<¸sŠ‹û»¼ãUCL Oèy™QÙ<{)û‰¨J¸½Æå÷ÎBΪ,|ó}êöwîkŠcHá œ§ÚÙûd`ùZ³”‚Æ,-!Ç70m?Û¤ËäwÅÔ=€µZ5€‚WÌÁ*®HÑ´¬ê’\Y“bçwVUIÕi8k`qûCeOk®Ý¥;Tj³–ÇRR£V¯™+Tº C ž ñ¤A(-Ó$”&´ðÄ ’–´cÁæà™$ •…NågÕ\A1’m4­VBšF$ɘ$±½½ÍöÎeZí“ïB›„ÙѤ6®˜§•¤ žç±¾¶ÎéÓg¹rù“iB£y’ñ ^¹Â+îô l A@˜{‚ÊLeGJ%û*µûZ~ùµ¶àÝï+ÿ®…ó½äyD‘¨‹óUUËï cÈ:Àcð<¤Ý†cëȉUüþ5tãÜÜÌ»º(>XN…Ð…¢¡³‹=£&Ï‹ë\':™¿æu©sΠåKe…J!l÷>gµhlwšZ*3£¤¦Ö]é5xâãÓ )k,{·âKLh–Í ^>4{VXj!Pµ×Wj²/R# ªQQĤ 1b¦¨ 3Àø=övÂóœu†ñxÄÎÎ6žç±´ÔæâÅ'‰ãˆS§NçýYç™çyÏ[({̵sË  óÉMâr¤į́«BTˆç‡À¹U:–̘…!3¶¦hUÅ9›ò'7èMu\™ë¿-SÚºp±“n×ù^³Ý}ÞIåõ‚#«¢I“(J‰ã$gÇÆs c"OÓïw“†0¼ ¾Ðl¶YZŠˆ¢˜ƒƒ>úÀ§ð}W½òå,/¯’Ú´„ãÓ$Í ¢ª)B£ÑäÜÙ[™N¦ôúÂ0 Õò¹téQ”rìØ˜•••ëÔ˜ç±P.ìÌâ\Ò§hæJæB —áÌӫèl*|©ë”Ìê(ÆÕöbшø¹yluÚ¥ÔB#)eÔk%§šLbÅShu"M9¢3¿q!ÀÊ“É1Æã˜é4f:ˆ¢é‰#Ïźqg1ÇqŒµ†ßh4i6[Ðéthµ¼üå÷bÓLÕÍZ™q¸ò¤¯èÖ[^ZâÜ-牞Šéõû„a€ç —.]`8<ÍÆÆ:ËËË4›Íç}(”íè6§¤å†•$ÉB] w};!P]FÂå¿,ЇÉ«QƒŠ­°˜äFMÕGôÌF-n7ÑœëïªNTµúUŠ«ÞsT ™ üÁ’F«¹.ê˜ápÂh”5ýQÇ«äIBÅX«ã† &ŸûÜ£ÏÝw¿$¿Gé,¶3N¼µÊD-++«œ=sŽkׯÐét8¶±ŽïÃåK—è÷eëeÑ|ãòžozþz£su€º2tÁ–«”kñwmlc…ÿSßá«]q2ÏØtÿ¶Ö³«óìÒñˆTg ºS…N«óõ†j 00‹%…ð*iz‚(š2NGŒÇ“?òénœš¦)"B4›Í’÷ÐCsxØamm…Ûn» Á/™¤¶ ­X‹M,arüøq¢8bsó Ý^Ÿ5¢(bgg‡(ŠXYY¡Õj½`B¡› •ލÏ(Ñij+ qW-»Vý*sºÞÒçÇ‘7Ôº%Ì2çº(U‰äTÖ GLf‚TK«,Dªóg³äÒb5ÁjŠõºs2×™ÔÊóÅzcð}ŸF£Qƒ1†~¿Ïã_,Ç­Ól­äÉ Éõ2‹~Ç„aȱãÄqÄööµL}ny™ñxJ¯×«,z·á…lu~_'x/š€X2)]m¼¸%5ížEŠjóU_*”r|¼e¹d­Ú@;ðïÝQ©%Æ]¶Xju4UM­  yXR,)ž—7üTú¾8ñ %9c aÇq…žîy½^?øÈ'¹ÿþ7°±±A’XÒ4™)çêñxLø¬­m0™Œ9<ÜCNž\çÊ•]ú}¯Œÿ‹PèùœÜl¾å†¾;VÒ핬ÄV*R#ýÙ`°Ø¹—Ö0úE-#3%`uÚFen`ö,ó–£!Ôú¢vã/9JYyVS(“þŒh2Ïïü"{€]h·Ûù|)  ×ëñ¹Ï=©SYCýòÒŠÃÑlØŸU¦QLàœsú,W®L9<ìr옰¾Þ¢Ûí•Æ¥ªAð‚5—T*ÃÕ;ègü™l·ñȸåžx˜ìòbD3ò—PUp0:7§:î¨:ʨÂt J'–zÑlA}`q;Þ¢d]™×)T§ùÉðmkÇþã3€ÂŒ1¥„aX1c ×®íòÙÏ=Ì=÷ÜU~^Ϭk„8Ž1bÊjñµëW3dèØ“É”~¿W.z÷Ø/4p{ßkº@Zr(f»¿‡ïû%8§ÕzÆÌ÷¹ÇàÕÊúÚîÀì‚>[ËÒÊÓhJ¯€ç{¬¯7ÙÛëUû -¨öhQ¨&ÀÆžçãû>ªÇViÇ-Ï'4¡õÙIzXl6TNt^~d¼žÛ—ëŠ8ˆˆ[Jí•ÌFÉœL¤3/WªM4ùïf:óº ¯zÖœ§¯$¥¬¶ ÿqÀ"#pbÏ›åENÐëõxô‘§xÙËï¤ÑópÈC=ARK[Œç±ºvŒ$Mèõº¬¯-³¾Þ ÛTô‰^HE²#ë.”¹Xƒï¨¦–Z V£»Æ'C”÷Ugrâ°”g$#q{}n´ÈmVg2I}°Ï|¢]£VÌ%½ZÅUëjÒÙâÏê~QŸ¶¨´þÇiî‚÷< h·Ûåï†Ã!û؃¬®´xíë^‘·f‚1Y(™ÄÆóX_Û #ºÝ>++K4)ƒÁ ²à_(ù@•íhƒVopV*.ìgÿÓç9ûõ/a™0ƒECbS<`ªII7¶©¼pËXr’é"P,žÕ %b&“QÕûw»ÈÄÕ!2‹¸ünž`ó3Ih®bLXM¨ÿó€§3‚"$RUZ­V¹X‡Ã!{û=žxâçÏŸ&Ï„›&ÏgeuƒÃÄÁ`Èúú ››}©_ù@Ís[‘‘p…ƒÒÔr°Õá.ßge˜Í®òsŠgd3õ²r•rœÁÜXÒ’¶P‡6KP­ˆ VÆÕ’Vw7w'‡dôk©&"U¹vÕ0\õ±Ò(4« jÚ(ó"úã̞ΊÈ5„¢+NDxê©-‚ÀçܹSAPzç©f¼°ÑduíÛt:=ΜYáêÕ~%ôy!äÙý«ÎógÌ8ëˆcÍ´U’$a¥ÑÀ R¶—÷ÿf 'µ¶ØVtn•I§Vã”E;ÿL…OËL¹ƒóêu,qZ"k¹C±¸g 5:óL"³WŠ‹§¨d³/³ÅcpG«~)x€Ì8DÙ} i·g;ö¥KÛ\¹²ÅwžãŽ;Î{™Ç›L,iÓh4YYÙ ÛÝc0°q¬Açp¾HVäÏG#p›¾*“â]mЪ‚nÆ1Yj6«xY@ ±¦-3l0/ù Ã2?x>Á, žÓ­+çR¢Ažçñà‡aùeëø¦ƒÁ桸ü~3/OëÖ«Ê!×õ¹[2#ÖÕ‡/«Ô‹bNû£#¡X"A_HÍp_8ƒXв)cRbR†4šYõ´«ò1Ï#È`ÌÀ邚yƒ½½>/^å®»ÎcŒ†>6IñX^Y£¯ šŽX_öûsÐèó-)žo÷­°A©(f¹8ó¥Ï]徯¾“p¿z‚I„Q>£uÖVT•#RZñÉ‘².QêŒ5Á\­*NWjóáÑÜ:¯ô ×Gt$X‘0aýÔ€ë× ž7+Ø}©†AOg}¢ ÔµÛ³þ‚­­Æî¾ûV|ßÃÚ€É$ÁÄ)kë'èv÷ìˆõUå°7¬$Å…—¹‘<݆ñÅ4ž…lPW}Vžy"–lû!MÐð<|L¦]Ljg$D…–¯N{J5®/C‘Ê$HT²æ˜y<5Gjñ»+*‹k óSyªÆ¡ÔILªIš Í=šÍ“WÒ¿Ôà›1‚0 +Í>Ævw{L&O²²Ò¤Ý^bcc0D|VV6-!caÄh4šë8*rÁƒbÖÛðSÔš\NÓ«ì„@,”‘..’1™ŠØ²²J@Ëhx>“$K+¯ΔF•êâÀ*鲘åv‰i©ïãNz—R‹ÇIxʼnkÌbÙ­Q¡ç´œe¦´fmB*‰ôHtV«A¥dSÈ•çËc‘¸õ‚bÑÇÂþþ€Ë—·F¼çOÜÏ™3§èvˆX]g©°SGÏ(ɼ€™Øˆcç·¹vµ‰ï›ü¦PÙ$ž¯‚òR$±õ`4ñÙÏ>Á—}ÙKññ ´ZK“Q—“Ç-»ûU¾çy•ãÅqÌd2¡ßïÓëuèv» ‡CT-AÐjµX^^"Mcâ¸I£ãû!AT á²{€ª\DUn†nw¸;lÐÃ2!VRF1ÒÔ‘EÌB© ¤«;ÀZt.¦—ºè¨Ô'jmú–VûŠuÂó²p5>Q QRU¬I±‘jLʘfs•0ô<@ž÷°¸z¬•ø¼ÛóÛ¿ýiŽ_á­o{ q”"¦ (ž×g40 *Ðh‘gÚFSƒ>Î{{»ìï ßûwñS¿ð{L§cT¬ÍĬ¢hJ6‚ßò¯^ÉK{.ÿÐ^ò’ó4!F„¡Â™ÓÊå+}F£Q…*!’uŸF#z½.ûûûìííáûßûwñÝßôv¾çϽ“á(âO~ÿ?'Š&X›E¾?! ³p(S©¨z„/4Q>²'xV Ð…HP5½€å•­^À§˜Ô™…èL\©êúËlβƒøTÕk‘ÔFWÕÖqE6Ý™C-ü¡˜ZR™íXŸ í~“+¬Õ˜”)«ënùyTm–¡A‘Ôº "ÂOlÒnœ=wß÷iµš$ œ9c¹r¥Ïh$•0*Š"ºÝ.ìïïáy† ðùîo¼ö·™ Ǵ€ßþÉoå±Ã˜ø_þ=Óišç “<9nÐh4‚F™0#_PhT—E¬x€ú`áúDmP>òóãüû_Æò Äx_„©8еR›$>_¶­þÈP3•ÙïD¨Nä­ý|ádaÉ¡‚-Þ=f&Q(áE¤¦Õ56Îí^˜¡@³yeöy…=“yFÿÈ£×yìñëÜ{ï­Üyç- ÂúZ¦Ê|ùÊ€$Éš‰F£‡‡ìçüþ?¾®_eº»ËÎþµvH¸´„—„üêÿô^¶"á¿ûû¿B'¤iöL’˜0ŒK}¤qÊ@‰gnUàÀ nßk½"ì äN†SÖƒ6í AÃó %`L”Í s I]XkšMlœ¿ð bxÚ,Q3·uårèÎì—Þ\²QZ¨:Ë0Œ>Øk,©UR;¥ÕZÅ÷0n.Wza •ñXÇ<öØUÖÖ–X^n‘xKœ< ãÉ››C‚ MSF£N‡ÉdB£ð3ÿð;é]¾DsrˆìwXÇLÇ>QoH϶؊&èÒÿöo}ÿ×y‚ŸýŸÎ‰˜1IEA eâ]¾_@§Ï,GX$üì»ÌÏy„ld—Õ–c ¯Áš×¤ë¤Y G ÛŸ[¬¹Dº8ð‹]q›UÞPþ³¼.PAYß2R¢ˆh­SLÜåçëłшEÈ&²ØÔÒï¬óòׯ\¾LQéx!>Üæû¢z\ü|4ñ©O=Îý÷¿œv»Eân9—Òï_cw·G’Dôû}i4|‚Àçk´ÓG‡=ÂIÄŠÆÁ·1ë½ÑtHw<ä§Û|ËüYvá/ýÃ_!Ž'DÑÏó1Æ/ ËÂÜÌMÁ§uÂgÞ<ßS¦ iª“Q%>ù ŸâÄ7¿„“ƒ>{^ŸÐ5@:SƒXddÅ®šèm¥zëRáªT9:©H¶e~_E)BÝ9ÁÌ´HŒÓ®FlÖÄ$L‰S6NÙœWÎc.7 ˜C‰\#PU&“ ùÈçi6^ûÚ—²²²ÆÝwÅìï?ÉÁÁ>Ýn/o«õøÈ¿øo™\ºHcÐAF=t<8ARhM§4ývr¦œi Æô8sæ¿ú7¿‰_|àI~æ?|‚(šæ›±— 64ÁϽÃÌ#ÜØØÅ!Ð"„bÉüî4ŸE_úé? ”È÷ý<)ͼƒ^àÚµ-Ö×WxéÝçé÷Çü™·ÝÉ£\äVÑdŒEkIÂâ™N‘8†Ôæš ¢B#±¨ 50¸µ¹Ì·¬7ˆ¿Hº´Ã7œ]áÛä[Ù1†ïø»¿@œ2Ì*ÊQ ¡Z›âû!ž§eþ:/Œµ@nQrWLLK'tv{¼¬ý Î^=äZÐ`च§J*)•©¨n·¤>ϤÉqñþb.±ÔØžµJ­ F9ÉTb«VC0Jš¶–X~E¥”QÏÙ‹&›œš©çaÂwܳÌg>Ùb<ž2™<ÐÿjŒ  È6Æ„ƒƒC.\¸Èòr‹Uî¹÷v¾¸Ã}í1^:$J§x6%LSüxˆ¤6S·ùt¡”|Ö‘`â *ˆA'>ÁpÊZЀnšmh´˜tºœÜXå7àùýí>ÿàç>DšÆe£‹x^–—fµ¡:+j\ÅÃé8Z³¨g“0Æì|ü*Kç¬K‹ƒ(¤ŸLr¤SÊIìZ‹ùÝÛíЕJú[«òŠRS|pXCutGÄ»hÄ™5Ü,Q©ù¬uÄ…,–D ‰ˆl—@9{n™~HÆc·(ö_‡Èfoe,Ïb Ýx<¢Ñ±V¹ï®e¦CÂQŸqwÀˆ„¦IÄâ!˜|r¼Øb.°–\2TŠ¡_¨M$›¥<™bñðüË6éþ!º×å­'6ø·åOñ+\ç>øY|_Czˆ˜ŠÐà ] Ž{Tó·*$I†ÅqÊ凮óŠ×–‚½Ô'NÒÜ M…'Tûk©H¢8}‰ó£–JÄh6!F*htVH¨«@ׄ¸ªä·ªÙH)›’å6”¬FY“ŒöˆµÃË_»Êµ« ú}ß—ÍBÃú#›Æ˜‘ÜF£ƒA?W–ö ÃßøK‚ã: ?9¤ I“ Oˆ$!ö…@ŠYo‡;ì‘æ˜¸UÄæ ý6Ívp+ Ôø¨7ÁtÐ1ÞïÂn‡oV,ib°¾ÌÖyTD¶D­3 >,¦H)A› 2EýF‚þŒI{Cƽ /;Šøïæ`󋟼F¯7a0ˆ'su€¼)ž¹Ý‘ˆ@šf1`&“˜‡~ûÎó=\—8˜I4&Õ$›ƒÅ”(M† ZÖ ÎÙ+Bš¿AÉžä^¡"¢ȉ³Œ:Cõ`¾mÍ9–‘yCÅ’ê„T¿Ëúj›åå&½Þ°B’+ò§š®~6Ð;›™< év³ÅŸ¦)að÷~ð½¼ãÎe†W/± Y‘ß³H ¡Æ™—4ù0Nc«ÞÞÝ–\o­VË"©–¤¯Ü;$H±E£¢„P<’•%â“Ê©å×Üu Ÿ¿|ôH’´¬âW¨Åî_çqe!M%ŸÇšyÚæds…½iŸI:el""Q5ómé´Ò®X¨BÞAµ&´;ßÜîBž³8žJr]%Ï1§WgœÎ<‚®åjb’yÐtéõ[¼ïÛîàŸÿ¯}†Cɤ*%/ IñÙ ï,ÞGôz½|ñ'„aÀË_z–¯í):—žb9p&Œ‰›)Y4# M£ø"³¦=­ n)ÓÉîBœM®p6_ Óõ|ÌpŒC³ÛƒñÛX& ×X^N#DåH—ä»h½+’{´+Ìbª$±L§ Oü棜ûÚóìM{tã1¡dbØR/¨bâ5œ¿5HM0Ë8©²SvqÔ¢¶P/²ÍMÜ“j‚ìÎ)XD’™|uVå&Æêkú4š!‰mòÒ{NÐï=¢(&M]õˆç·8š¦yW×Á ÇáaÕlçÿ3_û~ôÏßOÿòS4†è¸Ïš‰ˆ=KV¬¨(x„-ZY :•”kÁ|qÔþLiSFχÀÇ„>7î& ÷Kv«[Éž’WßýoTÜ)Ú£(e:ô'¼”%ΙUöÆvB¢ ±5³Ç0?ž¨\ó ªÀn#ñœ¢\¡ êÎ+ ]õù¬:°çj”Vú MÓìæhÍk”LR«XceH¢ ãïøŠ³\¹²Ìp8"#Qj>¦´(¼<%‹ÉõQ”µ4öû=ºÝ."»ó¿ÿ®¯ä­÷çúO²"“>¾£x–ÔU/׌R<+xª¥zHôÍåv˜mr*R@´¹´‹L“¬vPÜ=ÉfuI«‰‚l¬¢++9ôirCv.iš.J‚‹Ø•#ëu4(ãÇa0ñÄo>Êé÷ܶöÆcâ4f¨)™xœuvs­¤Õˆf¦è<#Ôi±¯Ž&¨, ¯íú•"Y¹Ã¸¿VõÕEyóÁÚˆÍhDLdž•¥C.]øÎ¿x/ÿàGð}±ˆdÕFkn‡ç‰¸ÞÚ3¤ßïÓívŸF#`mu‰ÿö^ÎÅÏ?Ɔ Ç=$"I„¦)FÀ7 ~Ç“fà‰¤1Ф³{©,Ø‹ß1“&Xñf€¢ž¡4pî4zϽ˜ãM®­½šÉ^‡ñxÊx6-ç¨;œ<$ã>P‘¨”u½hŒR¤DÕìÚ DÆS¬bÃ3#~ˆ¶ZÈê ¬¶ÑsgÙ;û Ýýþ€N' Ùz½>ÓétÈ  àGÜŒˆH‰yO§1“IÄ`0æá_3_{û:`G{4SŸ”¤¼"Z…#ËáÕ™‚Î%º"3é•Y^ì†;õt[*zAâò*Ä8­Îê£YÝÝ)C«Œ BHmLï0À;!üµÿáüýûâ8ÁÚØ +«ð¥hÅ=O’„8ŽÇ ‡úýišÐhd‹ÿÇÿÆ7ñžW­’î_¥59@Fd2„x ©CkÀf÷Ö.‚ûÌn²¤¶~䛪b¢8ÛÏÒFi¶±ù‚´°±‚9s–úã„^oÀÁA‡ÝÝ=ö÷è÷L§Ó 1êÊ~fó°Tmî"F£)½î˜W‡ktLŸÄKûSRM˜äSË…Ê# ¤SÜΠ˯7ÌFåf­ó ©£þTy•?¯å*âð²%_üéôÒØÃÊ«BÐ ÙÙNX_µüóËùÑ¿õÿ”!„jJšâÄ«_Zzš.ý½ˆ÷ÇãƒÁ€~¿ï›\I:à“¿ö×í\e¼}™ñÁ>“ÁˆU"–Ò([ü¶h4·TùzýÅÕºŸ{ e.§Z¯ÊPàÛÙõód© 'Ž!gNáXáúò= ö»t»}ö÷Øßß§Óé2ò ¶uªöNàbØ7oB'L&† ˜Òë ùì¯~Ž[¾öF“)›aµ–X“JtB‘y¸evâŽû,ŠT'ÇÏu޹?Wµ½yVùD•¢±TC#©„F3a¼!" ªË„F»KÆx¾â‹ò÷þá{øþúÌwS[鯩òÕÿ¸ Áõ³d7b2™0³Ý2 y|^ÿš;øé¿û^›OÂà€­Ö½ˆUÑNrz³–Õ«*Wiê°Î«B …]Xgm• hÎÃóAÓŒc íº±9¾wrƒÁo ßÑíöØß?`w7S¢ Äq¼P"—Ft'Å?3|U‹1ÙXÕÑhJ¯7â•¶ÉÚê'†Ëx1ôí0×Ý·˜ÊÌ0æ&ek•À9S‚ÐYXS‘f»IQo/ùv˜ú\¿R­ÚmÝÁ¢¥¯øýl–˜z¹Æ/LP›¢bI­åÇòOðW¾ÿßEqžØ<HóÁ¦¢®üÅ2„ºTaGDQÄt:f4ÊÂß÷h6³]ÿ'~ø}¼çµÇHwŸ¢5> u9Ž ’ ’&&ó ßÅ'•*õDÈ’â9à¢Î:w¶¬34±ÙÌæl¬!Çב3'ÛβŸ.ÑïrxØeggÝݬ1g2™ÔZ~sUr)}ŠV`¡*;ôègQA.ê™ yè?=Ì-í ΄ë e¿I˜Ú.F …Àê¦!U$³¬ ËÌje4%Ô”ßTjês,²¡ûQĨ¶«”}‚xÆ4•BïrŒx]¬=Äø;ô‡Wøß~êk¹õÖu½^¶øë×v¶Æ¡F‡~v3qE`2Q›¿}‰[Þ}’®2J'Ù@íÔ:Ä·œ0ç ãÖ&ÁÃUa¬2‘™ëõ-“(SŸ-& x@u@E@+¯ó:9J ’"äjqª)b”ÔÆ4š)?>æoÿØý|æÓ#~â'þQ”ä…2J„È•q›»¿ÐÉíZ,€‚ÁYtR•Ý4Mñ}/çÑû|ߟÿ ¾û½/c¼{íí€ÂxÓ D1Ø$ÛÅ5BÐZÍ¥r‹u!óÖÍãJa4¬t”³r«,FŒµh³Km8}s|…î]o¡ßÑéôØß?d{{—ýýCƒaú¸×#?Ë +cRŸIPÜ $É.°1 µÕqpÐçÍ{趆Œ’ ‘&¤¤$©`%Ém8‰HUçÖ­k…çïæZãü”2‹Žç`‘:E9rsf©Ís`TªÈT=lš ž0þTòÊw6mÞHÊíw¬ À—½v_úÅoå¯ýõÿÈ… »¤i³Û\’>`L‰¹É²«ÏSG’Üꦻ˹ñnáa x³XüIç?!AàsïÝgù…ò-DÒƒ‹„£ClÜ;†h’Åúiš …‚5© ’ó%tæUù¤–ü‰›[¹{–T‰"Ù„ ¤ÝF6V‘SÇ0çNrù¥ÛÞÞaww—n·Ëx<&I’ «Á жa¿F/qÏTí e ÓiÄxìÑïh·<þ›rûWÝF<™ÚO•¡ŒÚ,žnˆ!•ˆj÷®ÃüPgÒ»K“(ZÅ1©¦êµßº-OùÎEì™WƒÕõ"¢Îë³qJÆO2²v6éRmæ$%ÕOƈŒIíüÞãÏ~óÿÁx¡Z$É3Ä#Ëftᙘò«»ðg m]Ö&­àú…JsM±Vñ}C³Ù }þô×¾Žû«_ ã´{‘`ÜÁLz0ÒÖ Ä$In©E ¯˜L:š ÛªÕn*ñ¿»˜­Î€’f÷ÓѬ­€ÓjÏ ^í&f¹Izï+N,q}ã ôö:vØÙÙc{{‡ƒƒCƒQÍQzÜ$Xó EÏA$ù ãñ4…|ÚíŸ]ãÜ+7èËH #Cš¤¤š”¢ê°¼êp}ycÇI²j(Q‘רŸêÄÕ¡|.ϤN©˜W’˜…DUB—jŒ’–Uc‘)è˜ÔN12äÿþ¥÷³·ïó½ß÷¯¹ví —·¥(r£Yו+õ!ЦN#Ó"#°åÂÏÀ ƒç¬Qä½_÷:~ä¯~5frˆv.¤ã&Át Qq±ë;‹¼¬rÙjìvù9töJQѽmÎtQE°A/VE2ä /€¥6z|{×ÝK†Ý;ÞE÷ _&½[[ÛìîîÑëõ‰¢h¡nSñ3cj•`ו>ÓW÷µÓiŒ1B·;¢Ùìqõâ6¯:·®Ǧ)»V˜jD/’’æs,Õþ0u ã|Ö-2ÿœ/gÖpe@ŸV«Àâ´LÎ5ä—Å*­„Tå”IѲp$¥:_Jš·&™, b#T&XóË¿|o|ï«ùÓúMüøÿ{~íß~¢¬Ï®©Ì!EîÈVÕ(vý‚Œç†Maèáya.Oèñ;ÿöàpo—Ïþ)¦{hØ>õ`2€h Ñ4WnHœÅÖ2&ëp—•ym]@Oqµ-µüûlß³x“¡KˆÏ6cP/À./á-¯ ·Ýœ<Žž;Mo0.ñþ­­¶·w8<ìäìé‘kØ¥Aäõ<ÛÄk:M'iµº\øäeîù†—2Mc¬Z|„˜ˆiª9ùi†õkMß§Ò© Þ\J­³£‰i•1ÛÑ Uê òïÏgœÎ1OÍp»Œ-¨Í=EJb-I‰xÃëš\¾rñ»²(%tÖ$`Ÿq<Ÿ Êß÷ØÝ5„¡Oë·Cîx÷9bÛˆema5%QKZ¤BºXåYÅ©lU‹¹3I”J*Ålö€Î¦vHåêJ9W Žý¸â]ÅR6®·Y û"‹v¿òk&æÉ8›9LÌ«_ÙæË^Ý á÷èö<òhŒM gϬóÿ÷·±ººÌÒòárá©.¿ô+ŸäsŸ»Âõë‡L£Ä¹?ÂÆFkáÎ;NðöûïáO¾çœ>ÑÄcãˆN§Ç¥G?Ãþ~ã«Âm' §Z#®v‡0‰!ž”°&iBE<¿‚ç/Ðmª\R¯È×›‹Ô(°Ž¨YQç)^æe@ƒ4h»Y[Bï~)Þéc\;ñzÝrñonn³³“%¾G…>ó!ÐŒ(àMÝuáÙ<Š|”ÁÀäÝùYÌ~ÄçÖ·œÈ˜¢LQ,ƒt˜5G‹`DË‚9 ‡¿_Á˜u~VÆ&Õ*Àåü`*Ǫȴ‘¡JŸ^°ç‹ã¤Æu¡â72f¬'hJÁ<ÃÚrÀËïöOë«pê$æØ*[·½›Þa—ýý››Û\½zÍÍm:n‰ù߸—Å•ýÏ>§_ÄDEÙ§/Œ\•Å£i^)ô<ð<áe« ÎÝ{œX“Œ0+ãtLJ–±g€Ž­’áj“䥶—ýNî•)nr\ ú•yztÆÕûj3ˆYXG>²Ú3ó6y±ŠÒnr Ý Bß#XòP|¢©amÙ djb ¤>©õ/7Ç‚mŠÅÆ1¤ÚRËtšÒ’„FÃrÌÏÚ4e½‘²qZ‘$É Y)sšL¶¶Ûy~ÌK{×ÃuŒA]:¢`]ª‰/3ôvÖVÑs·àÝršþoÊcþ[[Û\½z+W®²»»K¿ß'I’§]—Ù|2-ûÛUÁO’i®æ{ÀööƒÁ€$‰o@´¿¹‡ïO™N}â8SøÍÈH ïzí[™¤Sb±¤`lÉŒ`n´©:Tä*ä¾°TsèÌæB¬G…/â(캢Rsï:çØë¨úU[ò=’8ÀóRŒƒfräIìƒ ~Þ{í‹fm¥*1Í¿ÚËŽo3/`lUÚ$ÍU²O‰SÔZÄZH-bs8SÓ¼QWX\b—Å šÅ=Ú3øn>5oâ1s ðРñt¹ž9ƒwr{û\ï+[[Û\ºt•Ç{‚Ç{‚¦Óø&#-éОA`ð<Á‡ˆÑ¨G·{Èd2&ŠÆÄqò4»ÚÓCDQþDÑ„8ž$ö-?ý^ýWÓGLÒ íÔg‹˜©¦$v&ZU†)RÛó]xM¨.żå.ÛM¨ t©óÊ™HW 2Y^ë“ꥺ¯»!‘Á€ÑüûÔšÿÛóÇÙï­dUeU/AEˆ¢F&De¥USòî&!мlͦú3ˆÒg¶ð‹Ý{š¬áI4“})Ôæð{]ÜýTåuav̽¶–—_^é)Õବ K-º÷¼™mÛæÊ•K<þø>ûÙÏsùò5:“É´¬y<Ýz!§ª§y!,€è‹XâxJ’D¤iD’DÄq<q>sCÈÔ¢È$SâxšsaR&ÿr­_'ëQ‹›ìßÒ!¶1¦ÔŸ%¸R› ì*Pk^©u™œnUQ G„-,˜\\h b´š÷QÍÁµb3³2ùr7èÜü™Ì(ªð‚uT¯³.µ†?ª† …÷Ƚƒ8ˆIN¦¶–¬fF°ìw3cRÀÚœN•õ¬:$µ±S•¸ý¨š´ö‚b2Ÿ›Up~—ȘÉo”teøÜm«Ëx›×.óÄòàƒqùòu:ŒF’$-Õàn´‹G’d4u¡ÙÌF0ù››»yËØ˜étÂt:©y€/ì‘q…& ‡#Æãq>=¤Çµk;ÜþÞ;yèŽ.$NáD딆VïÌ“ڥ꬀E!S•GT¹%¥aU›htNAt¶ÛëQ—È8}P²°jZ_U–ª^ŒÎ ‹ÊÆmkÉ’uv^;c`*|um> ½~è yà+©;«ê…ò¸Q¬ª ‘™‹BÞüæ[*«õc»ª¸bbUâÍo>/õÔ¾wÇÇ£ ×›´ƒÀ÷NœhGQdƒáж76ün-ϘgÏŽ_¼—ýÄ'®êÍ.áo<_ÆYY5/·V“ÊúûèÇ?~EŸûÅoxão)>‹q>‹Wß!ÍÁ~üãWí³1ü§9÷úykö^‹Ïû&Žçâ¢îµL³nÆz}ó›o“‡Þñ Oîºëxº½Ý÷NŸ^Ñ­­¾×ïOWNZ­­5‚ü:ÏÀ¹^š_§˜æÏb§O{éÒ¾ C_Ÿzj¯a­ÕÕÕöX¬ý;Ui‹ÐÁ³ߘò"hmñkýͬej £é4 oâœp½Pn€ ŽmËi#MUÆ”nQLçÞ˜$O’§ù×âÄÒüÊÓ›ß|›ôû‘·²z@h­üBzšÕàçøãü=À~ìc—õ¹Üùßô¦[%¿yÅÍl%‰,û¾U¼¼8:U%RÕ±12Îonò±]¶Ïìý<Þô¦[¼(²çIèyÒP¥­J£˜%B”Ÿ÷˜ŽÓV¼è¼E ozÓyðÓTž'a~-ÖfÇ̯e±ÇÎ=KŠcº÷ì¾ûn¯+2y£QÚô}„@X¡ ,åß7Èôm‹q0Å9 jÏ‘s/Ó«W;pþüz*ª¿óV`]•eBf‚¹‹vü²¦pô¬ÕCc¤ ó|ô£O)À[Þr‡äÇmËÖ²!Â1–ñŽ0wñTé=zî‰}üãWìQFà,¸âF-ëÀ°’.ò÷è«Òá0MíÐó̈xà’>W1z± óÏÓVUå°&¢íüç6?¿žµÚ1Fóë=qÑÍào~ó­’ï–õs_vÞkœ_ÛŽÝ$±£é4™.-…sïõæ7ß6w?)¯e;¿—iþy»9âÒÍ×Çô.%õ¶N)Žën íü˜ëÀ1à°æÜ·b3ù™æïÙ}`O•Ã|½ Ý“ü$^œ)\»?+ÿ*ÛÀeÇUÇùÈ…ÄóL™$âWõo!0QåØá’³qÆÖÚ|†—«ØQת¬ˆp 8 œÎgÓÃá±ûz½æ]3cd‹ï ·Þ:ùÍ ÐKÀ&pE„kÀN¾^ ¸ õwvÎýŒ1>ij™N§¹„ÜÑ÷:czNñ¼k'ŒÇ‡¼ñ·|0IS;ñ<3vOйh^~c·Î¿o„µÉÜ\±xžÒj%»·ÜbY„§€‹ùÚÊ_>~ó›o·<ð”a»b­†ÆÈªgF£ö׌FÇ¿„ÉdÂdÒÇÚ!ãq‡3g‚zúôÊoä;ʘdÿæ9 Þð†sÅŽ¼œ/š—ôûÇ¿;ŠVÞž¦ £Ñˆ(Ê6ûɤÇk^³þ—󓈊ØÖZkoÎ#•eá0_T'¦Óð­½Þ©ÿIÄÇS¦Ó!I2beeú¡Ó§ƒ+BKUHÖ®÷¦7Ý=ðÀSi ùó%ŽO&á[úý“G$“‰Äñ€4ºë£¨Ó4eÏ!U6€N·ww÷zçÞ?™“Ù„š)ãñ.“É„(Šò&˜”¢åêUïkÚ퀕•†Þ{¯ÿ/ǧ€KŽë§iÌp8a4š2é÷Äqä ®†›™œFÕ í¶e<žðÆ7ÞrØÍ-̤©%æîDáÖ€3G¿ï,Ú*dì2ÄËœ¼z5øž—¾tùcÇÂßšÖªgŒÔs…E» 1FŠEwb4ò¿*Ц Sz½>ûû[ŒÇû¬¬(»»éWž>ýꌑ½|¡šç Ê?K±Ë­‰p¸k2i¾=›»5¢Óépx¸ƒµÖ×}†ÃÖë––ý|÷ꣷ¼åÎt±±éý|·?1_™¦1ƒÁ”áp”!;,/›wŸûû{ôû}â8*©Ì™‹É ÈñxD¿ß! ³kÏ3|ào[.’(JÌÝ#2ò}æ Ã1à–ÍÍÕo<8è²·—]‹K—.sñⓌÇNœX¡ÛÝ<¾l­nw˜%­OÏsrçÜ“ÖÁA—íí.‡‡vvvÙÙÙĘˆNg(š~çŸúS¯ŸˆÈØEÃî»ïvuò¬â~¶¬µ«ûlmuØÞ>à±ÇçâÅGh6} ŸÜë6 ĦX2“÷­e9O¤oîI’ðí»»þkÙÙÙã©§.ñ¹Ï}–‡úÃáˆÉ$ʈ³™ÀÙî¿G£âû§Nfuu…åå;;-vüñ¸ÿ]o}ëKÿ©12,<©ßïwØÚÚâ©§®óÄùìg,‘ϬO8-Í2iô¬a; ýR6;O‹b–>Í ð€Ð}ßÇ¿Àƒ>”_”¬7˜¬É;úì#â±¶vŒFçÙ:uºÝþú»Þõò;‚À»¬æÙÿÈZÍ% |ÿÀÚÄÛßßåêÕ-žxâ)}ô1@i4|úý‘ç`ÌRx€/<ùõ¼0”°Å€sûûÃå½½}®\ÙäÂ…Ë<öØ“\½z™0„(š¢j™NãÛà `-Mí’ç™á[ÞrWòÑ^Ðg`}^æ“`Ÿk×¶ÙÚÚgkk—ÝÝ]Œ‰ zX›Ðlßþž÷¼ºï {  o~óíÓ´íƒ ºÝC677¹xñ>øý~'¿WÔ<ƒ»³Íô¾ûîpCŸÜÈïŽãÆ;®__ýûû®_ßæ‘Gá£ý(—.]¢ß0gíž7Î4M¨*¾ŸrõêU<ÏÐj5X__fwwÍÍý¯~õù{VVZ×såÐã)£QŸNç€k×®’$q®"ZÒG3T'û¼Æ˜ògùc3‡¹" ­óZÁ“ÅûîsíÚUâ8Ê[3“|Ì}vB++‚ˆ‡jÊîîv9“v<3OøÉŸüÕoûëýË   xõ$kÑû[›Èd2¦Ûí²¿¿Çd2Í•Ô<¦ÓØ­_”³õ¾ª°ÅHÔº§¢È{i¯7àð°ÃÖÖ—/_áʕ˰´Ô ‡‡yèœäÉiýÚWˆƒß?‘'½÷nn.íÞÞ!››Û<ôÐçùÝßý ׯoÑíö‰ã˜0 hµB––šéÏýÜßø$ÀOüÄ/ÞõÀœð<0F±6!ŽSF£ ƒÁn·ÇÁA‡ú¡õþòO¾çS…‡/Õ¡Ó4%Š¢¼W2뀺pág7 }Í/@AWvaÐý…¹œc½# ½´Ä•cL’$7¦¬°|õêÿõ¡âu<ðÈúOþ䯼v:évÇ ‡&“¼¿V-q5òx}©YŽHÂç>B¶n\u6½ôû\%¿¡*+9ÜyöâE냃CöööéõúùýÈÊÛÀ99ÞlËy~2V%y†å -D›£p£Q¦ZáyÙ”õF#ä_þËßúÚ¿ø¿ºð£é4I ?ÍáÈC»sxí—Ç^©™og€»®_?ùÝ™Æç=ö8ðáÚµMúýiš°´Ôä×ýÇ>vï½·^Éwñ> ÿâ_üµ€ÓÛÛ‡ç¿ã;~ü­ãqÄh”äþÒÒvwùžïù'ÿýÿþ¿ÿ÷‘,ØpE±l®æÓnµ!®þø½üù‘ügŸžÈ ›_›…º €¼øb¥i’+Hd±\þûî{Ùá÷ÿ{;þ$ëëK4qœ0Mò¤½‹SL ²$+½‰÷ÇQPgzÈÖͳ}âºû–«ùŽw¶ßåMÙÙÙekk«­ÍFÑFù.6æŸþÓßøàt^³Y‚·½ínq5ïzÿú¹¸* ™jtÌh4¡×±³ÓáÚµ].\Øäßü›3ð U^Òhø's¯á„‡•cΤuŽÜT‘¢òíü|ΪÊ݇‡=öö¸råŸûÜç¸|ù2£Ñ˜8Žiµ<üð¿üÝ{ï½µXþ ðÑüëœ>½ñÑÿôŸþþX[k†$‰'å9ôø±ûΟ´ÖN€ÔŸ¹vÁ÷ýÜÖÖÚŇß®×sk+ª³±ƒ“œ8Ѻc) Afs ŠÚØ2©žªêÅ< ÛxûÛ_!ýè“÷@H ½Þ€(R&“)½ž`­6Œ‘¢$.Å<ا{ÿºüŒZ¬G² ¿€Ý¿@Á–óZÈé«W›_ÓïoÓéôØÙÉŽ^¯‡1R cÍÆPxüñk«À)q=ÞDU“gÍA1 5 [²dß÷M™çýÒ/}ä»Þ÷¾·ºù€u8LR4«Í_£ÅïƹÇóO=µô¾ƒƒ]vvv¹pá"ŸùÌg™L&L§S~ð}ÄóéápòpxO†a°—'é ´†ÃÉ©V«q`ŒôþÝ¿û±îW|ż¿ß±²ÒÂ÷=~í×~ô—n¿ýô§¬µ—z½Ñ`i©9ñ]—U@³²±±r³ô\}¶´ÞbÑe9EfxËË €¡ˆ¨Ž¤gÏž§Ûµììô2 LÓ"Vw)¯N3ä3 þèXŸyHhŠ"Ožüž=8˜¬f¡Ïׯ_g{{»lØÎ¦É(ij™LbÆã)ƒÁUÎäÞc5ƒ†Ï" *Ï»M¢H’„ÉdJ§3 šÍ€V+”ííÎÛNŸ^/è&œÝîªþþQáÏ‚ëQÀ™bºËõë[<òÈ£L&“<…f3àܹãŒFÓÇã8y|i©ù”“wKKÍîx Z­pt_ñŠÛ¿|2™žyÏ{ÞðÉù‘ïü|¿¤ÊõååÖ¾µ:õg»ŸàûYàÜh„;¶Z|ÐÓùïå_q¸9C «ª""i'iøé¬QõèÀÝyƒ ާ´Z!'OÃ!ªiþÕ[Y9Æòò!žç—èÔtZ@“»ŒÉyªôQJaî|4}CxvF"b¸ï¾;‹Ý)G;N¥©Ü1 étzlmmqéÒS¨&xž°¼ÜL§ÓÔ+DÆ’$@2Žù;çç¾î‡øŸÏÓ·¿ýžè#yLorª„ij‰¢˜bX¢1ÙûÇS{9’Óà—ù#oüÞïýºË™Ái ÒÍÃß™é¼9‚½G½ž`ØØß÷ÞÖédr‡››[\¼x$Éf*„aÀç>÷/þ3p¡Ýn\h·W¦Óø°ÑÆ bý 0ˆZ­°SðÙþ×ÿõûþ§ü)0Ì9k»žgúQ”$aè§¾{cÃ0(§œçÍ3ócN’‹C:Ú/ø€Ÿä8|>¨Ø 7/(!@ð<ååVÓßžÙZšŠïû-Œ ˜N'¥èVšæS83¦d”‡ez3} ³$M+ÞèH…gå;³W Î>ñ„ÿU½ÞN§Ãõë׉¢i©éù©Oýo¿ß}ßÿ®4µÄqеä1ú”ííÃÐáP­ä¤¯ñQaÐâð¯ÚûÇÓÜëëŒf9\'ôzCvwX^nðÓ?ý;æ;¿ó+yˆºâ‰dÈ›ÛvYÆrä¥)àÏ%`mkkzw¯×ãàà€íí\Þ<ƒÅÃÐGD®åkmè7Aœç 0­¼ÀY¯™ßà‘ÍO‘(§x„aè‡@ÓŸµÊŒññ¼€,ƒ¶üØýú»ï¾û%œ>}šÕÕ5šÍVžèD£:&Iz;¯ýíÙ¥½¦éÍðffUõ Ë.çïÖ‹|úÓòöñ8¦ÛísíÚ5Ò4Kh<ϰ´Ô$§ÌPëB 7zÿºÚòsßû"ÜÿÝ.m`-çþœ+’ßíí._¾”K|hmëêj» ¢(ƒôŠ„8MíYÏ3'ók´ ßö¶{Ó?øƒGŸÑdñ¿ÍÑ?rVWÛI§~¦¥“ÒíŽØÞ>¤ÑhðK¿ô‘ïxßûÞ:"L3#”†H&¿îÂÅG‡@%"FP@•‡‡öövËá‚"ÂÉ“kQQÏóÐiîAZ9‚´‘WÕ—œBfAÔ\‘%`)güîù³„Ôæxnîîvi··P ètF¬¬¬Òjµò9²}¬±º £ÑäÔë_ûikuWU÷=ÏÜ, Yª|ãaŒÏd’0¥üôOôµ§OŸµ¶çôûC®^½ÆÃ?œÅb’$¥Õ ùÅ_üá_ËA/¿0é͓֔YÇ‘¨Å}Ö“ß3W®4¿®×ëpxØåÚµë¥À“µ–_ûµ¿ý1€S§ÖÓñ8ò†Ã ½Þ˜(Š"†Ã)?ôCÿêkÿÁ?øó;LÚ.©júLò¯"°6É /ÛÁÿðê÷ßð†ïywd”$I8<âû»´Z!>ºùu÷Þ{öÿÉ KƘFÖ ej0èMæ ‘Í&å|Ÿ~T úÐÿüÁ|ñ]šÃÉë9—ê¶œ)z<÷Š æJº{ÎøÝ|gBL‡ ‡CÒ4ÃO»ÝC®_ßdmmåå6FƒÉdÌhÔ¥Ùôi6³¹Rßù_¹ „žg¼ …Io²În!ıe0ˆxðÁ+묬ôPU:<øàƒ<ñÄãL&sti©I³r÷Ý·ê†"2p¨"émåðém[[§¶`gëcâèÛfŸ!5ÅóT'ŒFÜÿK¾Í/\¿µ6ßeFxža2ÉŠD£Ñ˜0ÜÂó²Êoq`ßÏ Ë¼Ô­9#ÓQJÒ›H‚gnÒZe2‰é÷Ç\¿¾GKnp677yøá‡˜N§DQœ³ýüÖoýÄOòž„²aÌֲ͈`2&ÏY,"¼õ­w¼¥ÜEŸˆ"sw6ˆ:Ãþww÷QÂ0d}}É%O™—¾ô|¿ê“IÌ`Ð-‹b?÷süŠ|àËQÕ UÝ5Æ ÞñŽ—Ç¿ÿûëÓ'Áîlh-ÇÝzžW:øÕ_ý‘ßÿ á~{’¤ ‡Súý¾ïqùò.¿ð Ÿz×·|Ëë?0ÚV1O£€oyºü¯¬›é4]J’l‚eÅ9·?#B¶ZÍv·ÌºMÞͶœ‰ã£QÂp8e01 saf)'í”÷$²´”’$Sî¿ÿ%güf™L&xl<ÎèÎ.´im‘˜¥pš âgCd?IR&“iÞÜ L&{{‡ôû}67¯åšîÙù7›!¿õ[?ñëǯ>\><œ8±:(žgÚ¹õGÿ;D¯0oã;œ¹pAÞÝë 9<Ì ¿é4" =Úí&ßñïyª`ŠW¿úž½^Ïžèõ"Z­{{{ŒF†Ã1ûØ£§?ð/?%"ëy\[ÔÒ›?ÿQ" z‹µ:^Yi{wß}Ëhss¿­ªôûúýÛÛVWwøùŸèÝïÿ+?”¦‰&L§q®Ñ¯<iõ}™f Fœo°â@ˆÃ#*{ŽEfÀÂ`°ÏåË»ììtèt2ÊtÆW ñ<Ãp8àà`‡é´ÃÊJH³+§DàyRâÁÙˆ(ÛY%¯Øz#¼ÿý_ñyvÞyf1ø †K’˜Á`Èx<¢Ûí²³³K¥W*aؤÝ^¥Ñhây>N—k×.Ñnû¬®¶YZj´ýY2dI’ßÏHoql¹~ý NÎÐAÕM&€ëI’^õ}ï°ˆÁo† çâîçe\†ª)“IRçû†V+䳟ýöEص–}Uí^¹²›ÜzëÉ(ÅæÞïÆïO­P. }¶9€ˆpÿýwtá%U6D85y/Ï;FôûúýaR6yYy4›+´Zí¼Fã1™LsjĈöÏþý»ÿ»ÿî>Ÿ£AË@ÿïxEü{¿÷yûô9Àüàó,îö ^ðlöüÇ¿ûßüÀüóoÍÄe³Át¾ß$I”Á`²zÇ+ÇwvvØß?¤×ë“$ižO,sÄÏ:Œ‚Èó,ÖÆ¤iŒˆ!Mmmüèþë·ÿèþ7ŸÎÜ¥YBÇ_ºt‰kײaÙ”È1iÚe<Î:IU…½½ýœJßf8žE|÷b–›¹A%Š Cÿ2ðdÎ÷äg3ë¾ïä‹¿ÿlbðŒó—)c²ü¢€èŠãy>KK-~ù—?|îÏþÙwîˆHäyY³ÆÚÚRØ}èsZ 7÷þTæ¢áô™æNòÛÌoÚIàì… éƒ½^¶ˆ²Æ#Ëd1'üàþü»_þò—qâÄi°Éxœ%½pJš&¥ ‡c|ðâ1à´ª‘¢)i¢ê¶K}î3Ê»–#rˆr tDd+7âø'~â»í/ÿåÿí½QÑé ¹~ý:ƒÁˆ½½?õS»/ŸNã|JûQ”†Þ <@Z@A§ú>¨f±rm ÿîîöÚÀ)U=a­®yž3ë™Þ¼re«»³s¸E“µÝn”c¯7¢Ûæe¯dýæ4¿¾»yžG–±×(_üW æóá+REps8üŒ„E“²&{ÿ,5)†pgû­ív€çi†Ç¥ü»÷çßóC?ôþëy(žxžÙu(9û92äçðèy€{ï}9axÃÃ!Ãá(R²cÿÀü¹O]ãÊÎû~€1)íĉ5òÒòr¾»¬æ‹a=®åîÝmw«QdŸ®€£MÊÄ:<<Ïèòr“ÕÕËË-šÍ D‡²pÌææÁë€;rùzÑ óÎw¾ê¦! bô<¯¼0aè³¾¾”„5à–|w¹5ÿ÷©ZÁÅ{ç;_ifɯç–ù×òðç\¯7¢Óé“u~]egg'G]2èÑÚ´¤x_»¶ÅիרÞÞe:æÌÌ€ Èr<óÎfr*…Æ‘÷ôùŠVX¹5ȲØä¦¹×ß¹¨ªÿÐ}ëÏœ<¹Îúú2++­¼ƒ.£0ôz½I’–íÃÃ>£ÑäàÀ½ÀíªœQeµXw"™„ À/ÿò#ï>vìÍf‹étR¢˜I’>¯ýKŸ:f¶f”„4UŒÉZÉò„ã%ÀË€W¯ÎŸ¯^¼ÄZ½5Mí `)ŽÓ²‡öf!¸"(ˆÁùð‡ÿñﮯ¯°¾¾L»ÝBF£)=¶·ùÙŸý­·÷ˆÈQ”Ü’/¶&UiÁ›¨†’pf!`øäÕÅ—æ¼8ç—©òÒÜŽÅqZè(™Â€Þö¶{Šð§$¾oëõt:]67·xüñÇsØQÊ"M±£öûCz½ýþ(W³®¼¥¥f¾0‚¼bŸñ܃1>øÔ«rC[Ï=Wð®w½ZžŽW¯ƒ,˜ÔQA×Ù‘'­ÕÏÿø÷ÏŸ>½Áñ㫬¬´iµBTSgL©½™jpiªº¯ÊµoþæwþÛcÇVX__by¹‰ç²F¨!»»þÙ?ûÍ·ªò*àõª¼N„—‹p—S > œ t% — Çya-óŸF.—F:m@Óè™[–ƒJÅÆ¹sUìÞýÑMŸ>ýSI ãÉëüÀ« …äšãÇq­ë˜&…Ðp!WÚidäå¿0htY‘¼‰i:0M[¶ümÝå—K—.C.—E"a0ß}‹…”mXV ƒƒWÜK‰$54’“;ûæiû¡EþK/ýúÀC½¸NQ$Ð@‹‹VËG¹\ÇÄÄ 2årýýý™†Š´x,âÆW“÷ß?B:Bïü¯$~Ôþ‰3ܽ·ß>´jt´¹jåÊ•Èç i…@UCqÏ¿€Í0$¼*¾òŠBÕ(ꉠÀ·ÑÑÑhÿ‰D Gîú¨¯/í³8 ¥ˆ^¤~É}û.Ù¹sÏ•år•Š ÏóaÛ.L³ÅÓ%—1µÃöM7}/ìb KâÔîKærõÁ(.I’¤±Ë!yû탫ÿúסËhÌcðe"™YË퀪ãxEM‹>öØí¯=ýô+÷W*uüƒiÒ7@*•A"1ßWûÏŸo®ïïÏ£¿?iªªÏSRž²j˜žžÁèèi:t…ÂdTÈÁ04îÜ9íyþd,¦VTßwX2s…BÅb †¡1¯KA ¢\¶Ëå ë:|߇eÕ†R)ª¿^±”¡õf°Àœ\Ïs`Y ÌÌáû>³Î‰$aH¦dYR·l¹çÓíÛßX[¯ÓB†…rÙc*“Š;öܰuëý§Ù³«˜]ÿ‘Ì `Ûͨ=l¡P€çÑx„ªÆ!ËqX–|¾ U¥Ôé4ày¯‹=),¸áþiKN ~<==3gÎàÈ‘#°m;2æu]C__º†äKY–x^µ#0ªÎ˜zÕm·]‡={>¼’Ûvá8šMÕª‰b±Š^øû†'ž¸ó˜"s^xäBk”„a@Z­&jµ*Ã"ùð<ÚE‘æ ›*ähȲ4Í2ðŒ ®}gxxä‘f³Åªµ9Ñï¹ÃqÜùô`€¥(ò4Wo½õº‘™™Ú÷M“v,m4lLOÓ–Eí¤b±†<ÒétJÓ þ”JŒŽŽâàÁƒ˜œœŠ€“†¡Á08tèOôý`, É”çù Õ÷›¨×‹(—‹pŠ"Á÷Ôj ;vçÎâE‹‘ÍÒ€L«e£^/Á0èkÛ04lÚtK €V¯7™Œ! Yÿ7÷¹A`£V+¢Ñ¨°ÈsȈE (ËÒyኋììÚR©ŽjÕ„m;ð<•JƒE£gžùó½Ï=·y@_†%Y–MnÍ»®ú<×mÁ¶k(•¦P«U£ê­–‹Ó§Ï¢X,âÔ©¯ÑßßÇ>7‘N'Jéì¹…«ª…ÕÊaøŸôää”qöì78|øß˜˜ФR:|ðÖÓ¾eéÚ…DŽáÐ -[–_gY-Ôj&šÍ][½^lj_btôôõe‘Ïç‘ÍfkÌ“VÃøø8jµ:šMŠ+"$„ahH¥t>üÊ«Ž«ª2¦ªÊ4SÅÚÞD"ß"õÄqT*LÓŠj°A ¤ëqn,2Ãïþê#0Ù-¦ª$°Ä^Š$2@y² Àܺõþ7ydÇ]¥R –å0&¢ö€ªROÊÄDiñ%—ä Z«åÆ=/y®³°œ€âI’Ôô¼–Cˆu Ô´8d™Â?h–Ba ÓÓÅHEÓ´Ù{C"u‰y¨„ &IP[-?mšELNžE­V‹Ö­ë CÃSOÝ5 à$hasì ÆQœü<ñÇ¿sï–-»6ÎÌÔ`š-„!¿!›(—ë XVKO&ð¼@Å^ÌK¼Å›–e…@yüE†ëúÌöR@Žð*ß¡$ÍbV‡­i‚©hÆÎ¾¹yóï~nš6óN‘¨o{¾¨jx„Õ’$ û÷×­»Æcon^Š2vß}^WUùŽ7ߺÜq¼¨zµQi’{¹\‰¼hRÀué[Ìq\¨ªŠD‚ÚTGîz ÀaÐ:¡ŒÖ¶J GX´Uf“¸®ß Ë^G¤èõ«Ðõ84-ÌS‹ DtJ•Š)da…s1 À¬t›[îïϸ©”O$¨O¼Õr£$I’ððÃ/>üÖ[¿ùPU€œNëpòÚ¢Ô “$©  zölQ¥0%‚ˆU <Ïgû–Øs¤Î=7dYŠöÌö(|?Pc1%üôÓ“›êõ¶ßq<$“ 躆x<§æD:mðL'ó7†U¥‡zÇ7ªŒibR¹\j$“InL¥thZ,Ò‘ÇCµÚÀÍ7?þ«¡¡ßß @ŠÅ €,ànøåSŸœ,³‹®Ý›Œ 6Ãq•ƒ€X±˜â€9nlŸëíÌ(Ö%IŠ=ùä϶lÙõ£j•îU–é…ÊçTÓne+¹`Õ¹!¹ûîõÎààêkxàùŸ´íŒ½]¼(ƒº‘E䀌d’Ò(‘ˆã“Oþð2€/óŸ`=¾<>>c—JuضIR)Ùl Ùlé´dR‡a$ ëôื‚ZéQC™€šçüUwAÚ 5“cc”Ju4› ¼Íšoœ @! Éy'Ÿ}vóvŽÇUæ-Ñ«¨Õ,LLÌàãå\×ÔÞ¶7ëù&€âªU‹ß:{v¦iÃq\hZ ™LÙl ™L©T{ï”&í=˲bv)˜@ô¡Ë²ÔP?~| Ÿ}v3¨Õ,¤Ó²YZÚåÝw»À7º®xð @¸dI‹e10åkn±¼ë)__}õeCžçÃ÷}Vû’Þp„•JAòÀ$¿$/ I“R^»ö»¯Ðh4áº>2™däuÉf“5›%„ p€ÅŒâ3A~uÕU+ÿ•ÏgÊŠ"ƒúóé|ñxÌé U؉>™%  %Õ¿¶mç‹K/]<ôÞ{ÏïX¿~Íõ–Q54•J0¾ÔHPý>“¡ôÍå’ÈåRØ»wÛž‘‘—·y^ðY„ÇA+ŠÙÉ}çO‰¡{:rLã p¡w+Œ5)¤¬u³|f%… Ý Ͼs;æ›aQ™?}9hxÞ$!΂ƒòxnò”àòºä òàÔrÐR|‹ÙgÆ<1„¹ö\bŸ»hW`Î1Ÿô aͳz0!?ÏœÜx÷/B·>F¯Ý’Œ¹;çœê˜SP©lM+Юçc·z™ÑQ.HÚ-‹f7_L¸!-ö;‘ñI—çóg”ØŒaÄN#óíÙbÄ-wÞj¢Wƒ­5dŒ(6±TØwyèÆ]…¬õ;ìŽó…³&¨" ˆ°÷2ÚÝ[&ØÞ—³Àó „H1á:!BN„ó. 9#á<Á1±ü¢+ ‘Ç…Hw|>åû䪶á1í|2.2´œ-˜Oº…ñ‡ñåsÈå5êãÜÚG»?–,¨(â|ð;^*E`.èE%®£“ØŠ°Í)Î7ŸðCéö,"ä¹vÛ£"Э[²`Îä!w–­™3ALXÛ|gÁ;ä𵉴é/öôòpñ‚Çb^¯x>J‡,t¾Y™bÂYÅ…KJ,´Û-Ú¼ét5ƒNžÒ;³mˆ$ѽK$™gcb7Dñ¹ÝºN†¡;# ëèÚݱËZ.ÖÙp!e,Èö<ß/ÓÍs+Š ! ëU²À³;h©`vWÇo{žè §Üq‘ü7óu›»sþ‹ñé·áíÞøÿr½Ñ€ÞèžôFoô 7z£'½Ñ=èÞè @oôFOz£7zнрÞèÿ±ñud¿¿KE³„IEND®B`‚cl-cl-1.2.3/doc/overview.edoc000066400000000000000000000005231301041406700157520ustar00rootroot00000000000000@author Tony Rogvall @version 1.0 @title Erlang binding to OpenCL 1.0. @doc This is a binding to OpenCL. You can find the PDF specification at http://www.khronos.org/opencl. This API will conform as close as possible to the C specification. In cases where the API has special quirks or features, this will be noted. cl-cl-1.2.3/ebin/000077500000000000000000000000001301041406700134205ustar00rootroot00000000000000cl-cl-1.2.3/ebin/.gitignore000066400000000000000000000000151301041406700154040ustar00rootroot00000000000000*.beam *.app cl-cl-1.2.3/examples/000077500000000000000000000000001301041406700143215ustar00rootroot00000000000000cl-cl-1.2.3/examples/Makefile000066400000000000000000000011261301041406700157610ustar00rootroot00000000000000 MODULES = \ cl_basic \ cl_square_float \ cl_map \ cl_binary_test \ cl_bandwidth \ cl_mul \ cl_test \ cl_buffer \ cl_image \ cl_compile EBIN = . ERLC = erlc override ERLC_FLAGS = -W -pa ../../cl/ebin OBJS = $(MODULES:%=$(EBIN)/%.beam) TARGET_FILES = $(OBJS) debug: ERLC_FLAGS += -Ddebug all: $(TARGET_FILES) debug: all release: all depend: edep -MM -o . $(ERLC_FLAGS) $(MODULES:%=%.erl) > depend.mk dialyze: dialyzer --src -o dia.out $(ERLC_FLAGS) -c $(MODULES:%=%.erl) clean: rm -f $(OBJS) -include depend.mk $(EBIN)/%.beam: %.erl $(ERLC) $(ERLC_FLAGS) -o $(EBIN) $< cl-cl-1.2.3/examples/cc_subdiv.cl000066400000000000000000000235121301041406700166050ustar00rootroot00000000000000// -*- c++ -*- // @Author: // @copyright (C) 2010 // @doc Catmull Clark subdivision typedef struct { int start; int len; } FaceIndex; typedef struct { int start; int len; int vab; } VabIndex; void find_faces(int V0, int V1, FaceIndex Fi, __global int *Fs, int * F1, int *F2, int *CCW); __kernel void gen_faces( __global float4 *VsIn, __global int *FsIn, __global FaceIndex *FiIn, __global float4 *VsOut, __global int4 *FsOut, //__global int *locks, const uint noFs, const uint noVs ) { int i; const int face_id = get_global_id(0); if (face_id >= noFs) return; const FaceIndex fi = FiIn[face_id]; float4 center = {0.0,0.0,0.0,0.0}; for(i=0; i < fi.len; i++) { center.xyz += VsIn[FsIn[fi.start+i]].xyz; } center /= (float) i; // Create new center vertex const uint ov_id = noVs + face_id; center.w = fi.len*4.0; // Valance = faceVs and hard_edge count = 0 (Valance << 2) VsOut[ov_id] = center; center.w = 0.0; for(i=0; i < fi.len; i++) { int id = fi.start+i; int v_id = FsIn[id]; // Add center to all face verts //lock(v_id, locks); VsOut[v_id] += center; unlock(v_id, locks); // locking doesn't work (for me) do it in a separate pass // single threaded // Create Faces FsOut[id].x = v_id; FsOut[id].y = -5; FsOut[id].z = ov_id; FsOut[id].w = -5; } } __kernel void add_center( __global int *FsIn, __global FaceIndex *FiIn, __global float4 *VsOut, const uint noFs, const uint noVs ) { int i, face_id; const int id = get_global_id(0); if (id >= 1) return; // Should only run by one "thread" FaceIndex fi; uint v_id, ov_id; float4 center; float4 zero = {0.0,0.0,0.0,0.0}; for(face_id=0; face_id < noFs; face_id++) { FaceIndex fi = FiIn[face_id]; ov_id = noVs + face_id; center = VsOut[ov_id]; center.w = 0.0; for(i=0; i < fi.len; i++) { int v_id = FsIn[fi.start+i]; float4 v = VsOut[v_id]; uint he_c = trunc(v.w); he_c = he_c % 4; if(he_c < 2) { VsOut[v_id] = v + center; } else if(he_c == 2) { zero.w = v.w; VsOut[v_id] = zero; }; } } } __kernel void gen_edges(__global float4 *VsIn, __global int *FsIn, __global int4 *EsIn, __global FaceIndex *FiIn, __global float4 *VsOut, __global int *FsOut, __global int4 *EsOut, const uint noFs, const uint noVs, const uint noEs) { int i; const int edge_id = get_global_id(0); if (edge_id >= noEs) return; float4 center = {0.0,0.0,0.0,0.0}; int4 edge = EsIn[edge_id]; int hard = 0; int ov_id = noVs+noFs+edge_id; int hov_id = ov_id; const int4 hole_edge = {-1,-1,-1,-1}; if(edge.y < 0) { // Indicates edge in hole const int oe_id = edge_id*4; EsOut[oe_id+0] = hole_edge; EsOut[oe_id+1] = hole_edge; EsOut[oe_id+2] = hole_edge; EsOut[oe_id+3] = hole_edge; return; } if(edge.x < 0) { // Indicates hard edge hard = 1; edge.x = -1-edge.x; hov_id = -1-ov_id; } center += VsIn[edge.x]; // V0 center += VsIn[edge.y]; // V1 if(hard) { center /= 2.0; center.w = 18.0; // Valance 4 and 2 hard edges ((4 << 2) | 2) } else { center += VsOut[noVs+edge.z]; // F1 Center center += VsOut[noVs+edge.w]; // F2 Center center /= 4.0; center.w = 16.0; // Valance 4 and 0 hard edges ((4 << 2) | 2) } // New vertex at edge center position VsOut[ov_id] = center; // Complete faces int F11=-1,F12=-1,F21=-1,F22=-1, CCW1,CCW2; const int oe_id = edge_id*4; // Be sure to create faces with the correct order if(edge.z >= 0) { // Edge is not a border FaceIndex IF1 = FiIn[edge.z]; find_faces(edge.x,edge.y,IF1,FsIn,&F11,&F12,&CCW1); const int4 e0 = {ov_id,noVs+edge.z,F11,F12}; EsOut[oe_id+0] = e0; if(CCW1) { FsOut[F11*4+1] = ov_id; FsOut[F12*4+3] = ov_id; } else { FsOut[F11*4+3] = ov_id; FsOut[F12*4+1] = ov_id; } } else { EsOut[oe_id+0] = hole_edge; } if(edge.w >= 0) { // Edge is not a border FaceIndex IF2 = FiIn[edge.w]; find_faces(edge.x,edge.y,IF2,FsIn,&F21,&F22,&CCW2); const int4 e1 = {ov_id,noVs+edge.w,F21,F22}; EsOut[oe_id+1] = e1; if(CCW2) { FsOut[F21*4+1] = ov_id; FsOut[F22*4+3] = ov_id; } else { FsOut[F21*4+3] = ov_id; FsOut[F22*4+1] = ov_id; } } else { EsOut[oe_id+1] = hole_edge; } // Hmm init only when declaring var on nvidia? const int4 e2 = {hov_id,edge.x,F11,F21}; EsOut[oe_id+2] = e2; const int4 e3 = {hov_id,edge.y,F12,F22}; EsOut[oe_id+3] = e3; } __kernel void add_edge_verts( __global float4 *VsIn, __global float4 *VsOut, __global int4 *EsIn, const uint noEs ) { const int thread = get_global_id(0); if (thread >= 1) return; // Should only run by one "thread" int id; int4 edge; float4 v0,v1; int hard_v0=0, hard_v1=0; for(id=0; id < noEs; id++) { edge = EsIn[id]; if(edge.y >= 0) { if(edge.x < 0) { // Hard edge edge.x = -1-edge.x; v0 = VsIn[edge.x]; v0.w = 0.0; VsOut[edge.y] += v0; v1 = VsIn[edge.y]; v1.w = 0.0; VsOut[edge.x] += v1; } else { // Only add soft edges if vertex have <2 hardedges v0 = VsIn[edge.x]; v1 = VsIn[edge.y]; hard_v0 = trunc(v0.w); hard_v1 = trunc(v1.w); hard_v0 = hard_v0 % 4; hard_v1 = hard_v1 % 4; if(hard_v1 < 2) { v0.w = 0.0; VsOut[edge.y] += v0; } if(hard_v0 < 2) { v1.w = 0.0; VsOut[edge.x] += v1; } } } } } __kernel void move_verts( __global float4 *VsIn, __global float4 *VsOut, const uint noInVs, const uint noOutVs ) { const int v_id = get_global_id(0); if(v_id >= noOutVs) return; if(v_id >= noInVs) { // Copy buffer VsIn and VsOut should be equal // after this pass VsIn[v_id] = VsOut[v_id]; return; } float4 v_in = VsIn[v_id]; float4 v_out = VsOut[v_id]; uint hc = trunc(v_in.w); uint vc = hc; hc = hc % 4; vc = vc / 4; if(hc < 2) { float a = 1.0/(vc*vc); float b = (vc-2.0)/vc; // We started with Inpos remove it v_out -= v_in; v_out *= a; v_out += (v_in * b); v_out.w = v_in.w; VsOut[v_id] = v_out; VsIn[v_id] = v_out; } else if(hc == 2) { v_out += v_in * 6.0; v_out *= 1.0f/8.0f; v_out.w = v_in.w; VsOut[v_id] = v_out; VsIn[v_id] = v_out; } else { VsOut[v_id] = v_in; } } __kernel void create_vab( __global float4 *VsIn, __global int4 *FsIn, __global VabIndex *FiIn, __global float *Vab, const int noFs ) { const int id = get_global_id(0); if(id >= noFs) return; VabIndex fi = FiIn[id]; const int f_sz; int4 face; float4 v1, v2, v3, v4, normal; int vab, out = fi.vab*24; for(int i=0; i < fi.len; i++) { face = FsIn[fi.start+i]; vab = out+i*24; v1 = VsIn[face.x]; v2 = VsIn[face.y]; v3 = VsIn[face.z]; v4 = VsIn[face.w]; normal = normalize(cross(v3-v1,v4-v2)); // Output V1 Vab[vab+0] = v1.x; Vab[vab+3] = normal.x; Vab[vab+1] = v1.y; Vab[vab+4] = normal.y; Vab[vab+2] = v1.z; Vab[vab+5] = normal.z; // Output V2 Vab[vab+6] = v2.x; Vab[vab+9] = normal.x; Vab[vab+7] = v2.y; Vab[vab+10] = normal.y; Vab[vab+8] = v2.z; Vab[vab+11] = normal.z; // Output V3 Vab[vab+12] = v3.x; Vab[vab+15] = normal.x; Vab[vab+13] = v3.y; Vab[vab+16] = normal.y; Vab[vab+14] = v3.z; Vab[vab+17] = normal.z; // Output V4 Vab[vab+18] = v4.x; Vab[vab+21] = normal.x; Vab[vab+19] = v4.y; Vab[vab+22] = normal.y; Vab[vab+20] = v4.z; Vab[vab+23] = normal.z; } } __kernel void collect_face_info( __global float4 *Vs, __global int4 *Fs, __global float *Vab, const uint noFs ) { const int id = get_global_id(0); if(id >= noFs) return; const int f_sz = 4*6; int4 face = Fs[id]; float4 v1, v2, v3, v4, normal; v1 = Vs[face.x]; v2 = Vs[face.y]; v3 = Vs[face.z]; v4 = Vs[face.w]; normal = normalize(cross(v3-v1,v4-v2)); // Output V1 Vab[id*f_sz+0] = v1.x; Vab[id*f_sz+3] = normal.x; Vab[id*f_sz+1] = v1.y; Vab[id*f_sz+4] = normal.y; Vab[id*f_sz+2] = v1.z; Vab[id*f_sz+5] = normal.z; // Output V2 Vab[id*f_sz+6] = v2.x; Vab[id*f_sz+9] = normal.x; Vab[id*f_sz+7] = v2.y; Vab[id*f_sz+10] = normal.y; Vab[id*f_sz+8] = v2.z; Vab[id*f_sz+11] = normal.z; // Output V3 Vab[id*f_sz+12] = v3.x; Vab[id*f_sz+15] = normal.x; Vab[id*f_sz+13] = v3.y; Vab[id*f_sz+16] = normal.y; Vab[id*f_sz+14] = v3.z; Vab[id*f_sz+17] = normal.z; // Output V4 Vab[id*f_sz+18] = v4.x; Vab[id*f_sz+21] = normal.x; Vab[id*f_sz+19] = v4.y; Vab[id*f_sz+22] = normal.y; Vab[id*f_sz+20] = v4.z; Vab[id*f_sz+23] = normal.z; } // Helpers // Find the order of faces so that vertices for a face // comes in the ccw order void find_faces(int V0, int V1, FaceIndex Fi, __global int *Fs, int * F1, int *F2, int *CCW) { int fva,fvb; fva = Fs[Fi.start]; for(int i=Fi.start; i < (Fi.start+Fi.len); i++) { fvb = Fs[i+1]; if(V0==fva) { *F1 = i; if(V1==fvb) { *F2 = i+1; *CCW = 1; } else { *F2 = i+Fi.len-1; *CCW = 0; } return; } if(V1==fva) { *F2 = i; if(V0==fvb) { *F1 = i+1; *CCW = 0; } else { *F1 = i+Fi.len-1; *CCW = 1; } return; } fva = fvb; }; *F1 = -1; *F2 = -2; *CCW = 1; } // void lock(int v_id, __global int *locks) { // int pos = v_id % LOCK_SZ; // __global int * semaphor = &(locks[pos]); // int occupied = atom_xchg(semaphor, 1); // while(occupied > 0) { // occupied = atom_xchg(semaphor, 1); // } // } // void unlock(int v_id, __global int *locks) { // int pos = v_id % LOCK_SZ; // __global int * semaphor = &(locks[pos]); // atom_xchg(semaphor, 0); // } cl-cl-1.2.3/examples/cc_subdiv.erl000066400000000000000000000556611301041406700170030ustar00rootroot00000000000000%%%------------------------------------------------------------------- %%% File : cc_subdiv.erl %%% Author : Dan Gudmundsson %%% Description : Catmull Clark subdivision in OpenCL %%% The example is the same as I will use in wings3D %%% Created : 8 Feb 2011 %%%------------------------------------------------------------------- -module(cc_subdiv). -compile(export_all). -include_lib("wx/include/wx.hrl"). -include_lib("wx/include/gl.hrl"). -include_lib("cl/include/cl.hrl"). -record(cli, {context, kernels, q, cl, device, %% CL temp buffers and respective sizes vab, vab_sz=0, fl, fl_sz=0, fi, fi_sz=0}). -record(cl_mem, {v, v_no, f, fs_no, e, e_no, fi, fi0}). -record(kernel, {name, id, wg}). -record(base, {v, %% array of {x,y,z, {Valance, HardEdges}} nv f, %% array of [v0,v1..,vn] nf e, %% array of v0,v1,f1,f2 ne level %% Subdiv levels }). -define(I32, 32/signed-native). -record(state, {f, % wxFrame cl, % CL record above gl, % wxGLCanvas orig, % Orig Mesh sd % Sub Mesh }). start() -> WX = wx:new(), Frame = wxFrame:new(WX,1,"OpenCL does CC subdivision",[{size, {800,600}}]), ok = wxFrame:connect(Frame, close_window), wxFrame:createStatusBar(Frame,[]), setup_menus(Frame), GLAttrs = [?WX_GL_RGBA,?WX_GL_DOUBLEBUFFER,0], Canvas = wxGLCanvas:new(Frame, [{attribList, GLAttrs},{size, {800,600}}]), Self = self(), Redraw = fun(_Ev,_) -> DC = wxPaintDC:new(Canvas), Self ! repaint, wxPaintDC:destroy(DC) end, wxFrame:connect(Canvas, paint, [{callback, Redraw}]), wxWindow:show(Frame), %% Must show to initilize context. wxGLCanvas:setCurrent(Canvas), %% Init context Base = #base{v=verts(), f=faces(), e=edges(), level=4}, initGL(Canvas), CL0 = initCL(), {In, Out, CL} = cl_allocate(Base, CL0), Wait0 = cl_write_input(Base, In, Out, CL), OrigMesh = setup_gl_buff(gen_va(size(faces()) div 16, In, Wait0, CL)), Wait1 = cl_write_input(Base, In, Out, CL), SDMesh = setup_gl_buff(subdiv(4, In, Out, Wait1, CL)), gl:clear(?GL_COLOR_BUFFER_BIT bor ?GL_DEPTH_BUFFER_BIT), draw_buff(OrigMesh), wxGLCanvas:swapBuffers(Canvas), R = loop(0, #state{f=Frame, cl=CL, gl=Canvas, orig=OrigMesh, sd=SDMesh}), wx:destroy(), R. loop(R, S = #state{f=Frame, cl=CL}) -> receive #wx{event=#wxClose{}} -> quit; #wx{id=?wxID_EXIT} -> quit; #wx{id=?wxID_ABOUT} -> about_box(Frame, CL), loop(R, S); _Msg -> draw(R, S), loop(R, S) after 10 -> draw(R, S), _ = wxWindow:getSize(Frame), loop(R+1, S) end. draw(R, #state{gl=Canvas, orig=OrigMesh, sd=SDMesh}) -> gl:clear(?GL_COLOR_BUFFER_BIT bor ?GL_DEPTH_BUFFER_BIT), gl:matrixMode(?GL_MODELVIEW), gl:loadIdentity(), glu:lookAt(15,15,15, 0,0,0, 0,1,0), drawBox(R), gl:disable(?GL_BLEND), gl:color4f(1.0,1.0,0.0,1.0), draw_buff(SDMesh), gl:enable(?GL_BLEND), gl:color4f(0.5,0.5,0.5,0.5), draw_buff(OrigMesh), wxGLCanvas:swapBuffers(Canvas). gen_va(NoFs, #cl_mem{v=Vs, f=Fs}, Wait, CL=#cli{q=Q, vab=Vab}) -> WVab = cl_apply(collect_face_info,[Vs,Fs,Vab,NoFs], NoFs, Wait,CL), {ok, WData} = cl:enqueue_read_buffer(Q,Vab,0,NoFs*4*6*4,[WVab]), {ok, Bin} = cl:wait(WData), Bin. setup_gl_buff(Data) -> [Buff] = gl:genBuffers(1), gl:bindBuffer(?GL_ARRAY_BUFFER,Buff), gl:bufferData(?GL_ARRAY_BUFFER, size(Data), Data, ?GL_STATIC_DRAW), <<_:3/unit:32,Ns/bytes>> = Data, {Buff, Ns, size(Data) div (6*4)}. draw_buff(Data = {Buff,_Ns,NoVs}) -> gl:bindBuffer(?GL_ARRAY_BUFFER,Buff), gl:vertexPointer(3, ?GL_FLOAT, 6*4, 0), gl:normalPointer(?GL_FLOAT, 6*4, 3*4), gl:enableClientState(?GL_VERTEX_ARRAY), gl:enableClientState(?GL_NORMAL_ARRAY), gl:drawArrays(?GL_QUADS, 0, NoVs), Data. subdiv(N, In, Out, Wait0, CL) -> {Res, Wait} = subdiv_1(N, In, Out, CL, Wait0), gen_va(Res#cl_mem.fs_no, Res, Wait, CL). subdiv_1(N, In = #cl_mem{v=VsIn, f=FsIn, fi=FiIn, e=EsIn, v_no=NoVs, fs_no=NoFs, e_no=NoEs}, Out= #cl_mem{v=VsOut, f=FsOut, e=EsOut, fi=FiOut, v_no=NoVs1,fs_no=NoFs1, e_no=NoEs1}, CL, Wait0) when N > 0 -> Args1 = [VsIn, FsIn, FiIn, VsOut, FsOut, NoFs, NoVs], W0 = cl_apply(gen_faces, Args1, NoFs, Wait0, CL), [cl:release_event(Ev) || Ev <- Wait0], Args2 = [FsIn, FiIn, VsOut, NoFs, NoVs], W1 = cl_apply(add_center, Args2, 1, [W0], CL), Args3 = [VsIn, FsIn, EsIn, FiIn, VsOut, FsOut, EsOut, NoFs, NoVs, NoEs], W2 = cl_apply(gen_edges, Args3, NoEs, [W1], CL), Args4 = [VsIn, VsOut, EsIn, NoEs], W3 = cl_apply(add_edge_verts, Args4, 1, [W2], CL), Args5 = [VsIn,VsOut,NoVs,NoVs1], Wait = cl_apply(move_verts, Args5, NoVs1, [W3], CL), %% cl_vs("cvs_out3", N, VsOut, NoVs1, CL, Wait), [cl:release_event(Ev) || Ev <- [W0,W1,W2,W3]], subdiv_1(N-1, Out, In#cl_mem{fi=FiOut, v_no=NoVs1+NoFs1+NoEs1, fs_no=NoFs1*4, e_no=NoEs1*4}, CL, [Wait]); subdiv_1(_C, ResultBuffs, _OutBuffs, _, Wait) -> {ResultBuffs,Wait}. initCL() -> Opts = [], Prefered = proplists:get_value(cl_type, Opts, cpu), Other = [gpu,cpu] -- [Prefered], CL = case clu:setup(Prefered) of {error, _} -> case clu:setup(Other) of {error, R} -> exit({no_opencl_device, R}); Cpu -> Cpu end; Gpu -> Gpu end, [Device|_] = CL#cl.devices, {ok,Queue} = cl:create_queue(CL#cl.context,Device,[]), %%% Compile Dir = filename:join(code:lib_dir(cl),"examples"), Bin = case file:read_file(filename:join([Dir, "cc_subdiv.cl"])) of {ok, B} -> B; {error, _} -> io:format("OpenCL code not found run: erl -pa ABS_PATH/cl/ebin~n", []), exit({file_not_found, Dir}) end, case clu:build_source(CL, Bin) of {error, {Err={error,build_program_failure}, _}} -> %% io:format("~s", [Str]), exit(Err); {ok, Program} -> {ok, MaxWGS} = cl:get_device_info(Device, max_work_group_size), {ok, Kernels0} = cl:create_kernels_in_program(Program), Kernels = [kernel_info(K,Device, MaxWGS) || K <- Kernels0], %% io:format("Kernels ~p~n",[Kernels]), CLI = #cli{context=CL#cl.context,kernels=Kernels, q=Queue, device=Device, cl=CL}, cl:release_program(Program), CLI end. kernel_info(K,Device, MaxWGS) -> {ok, WG} = cl:get_kernel_workgroup_info(K, Device, work_group_size), {ok, Name} = cl:get_kernel_info(K, function_name), #kernel{name=list_to_atom(Name), wg=min(WG,MaxWGS), id=K}. cl_apply(Name, Args, No, Wait, #cli{q=Q, kernels=Ks}) -> #kernel{id=K, wg=WG0} = lists:keyfind(Name, 2, Ks), try clu:apply_kernel_args(K, Args) of ok -> ok catch error:Reason -> io:format("Bad args ~p: ~p~n",[Name, Args]), erlang:raise(error,Reason, erlang:get_stacktrace()) end, {GWG,WG} = if No > WG0 -> {(1+(No div WG0))*WG0, WG0}; true -> {No,No} end, {ok, Event} = cl:enqueue_nd_range_kernel(Q,K,[GWG],[WG],Wait), Event. %% OpenCL Memory allocation cl_allocate(Base, CL0=#cli{context=Ctxt}) -> {NoFs,NoEs,NoVs,NoFs1,MaxFs,MaxEs,MaxVs} = verify_size(Base, CL0), {ok,FsIn} = cl:create_buffer(Ctxt, [], MaxFs*16), {ok,EsIn} = cl:create_buffer(Ctxt, [], MaxEs*16), {ok,VsIn} = cl:create_buffer(Ctxt, [], MaxVs*16), {ok,FsOut} = cl:create_buffer(Ctxt, [], MaxFs*16), {ok,EsOut} = cl:create_buffer(Ctxt, [], MaxEs*16), {ok,VsOut} = cl:create_buffer(Ctxt, [], MaxVs*16), CL = #cli{fi=FiOut} = check_temp_buffs(CL0, MaxFs), FiIn = FiOut, {#cl_mem{v=VsIn, f=FsIn, e=EsIn, fi=FiIn, fi0=FiIn, v_no=NoVs, fs_no=NoFs, e_no=NoEs}, #cl_mem{v=VsOut, f=FsOut, e=EsOut, fi=FiOut, fi0=FiIn, v_no=NoVs+NoFs+NoEs, fs_no=NoFs1, e_no=NoEs*4}, CL}. cl_write_input(#base{f=Fs,e=Es,v=Vs}, #cl_mem{v=VsIn,f=FsIn,e=EsIn}, #cl_mem{v=VsOut}, #cli{q=Q}) -> {ok, W1} = cl:enqueue_write_buffer(Q, VsIn, 0, byte_size(Vs), Vs, []), {ok, W2} = cl:enqueue_write_buffer(Q, VsOut, 0, byte_size(Vs), Vs, []), {ok, W3} = cl:enqueue_write_buffer(Q, FsIn, 0, byte_size(Fs), Fs, []), {ok, W4} = cl:enqueue_write_buffer(Q, EsIn, 0, byte_size(Es), Es, []), [W1,W2,W3,W4]. cl_release(#cl_mem{v=Vs,f=Fs,e=Es, fi0=Fi0}, All) -> Vs /= undefined andalso cl:release_mem_object(Vs), Fs /= undefined andalso cl:release_mem_object(Fs), Es /= undefined andalso cl:release_mem_object(Es), All andalso cl:release_mem_object(Fi0). check_temp_buffs(CL=#cli{context=Ctxt, vab=Vab0, vab_sz=VabSz0, fl=FL0, fl_sz=FLSz0, fi=Fi0, fi_sz=FiSz0}, MaxFs0) -> MaxFs = trunc(MaxFs0*1.5), %% Overallocate so we don't need new buffers all the time GenFi = fun() -> << <<(C*4):?I32, 4:?I32>> || C <- lists:seq(0, MaxFs-1) >> end, {Vab,VabSz} = check_temp(Vab0,VabSz0,MaxFs*(3+3)*4*4, Ctxt,[write_only],none), {FL,FLSz} = check_temp(FL0,FLSz0,MaxFs*3*4, Ctxt,[read_only],none), {Fi,FiSz} = check_temp(Fi0,FiSz0,MaxFs*2*4, Ctxt,[read_only],GenFi), CLI = CL#cli{vab=Vab, vab_sz=VabSz, fl=FL, fl_sz=FLSz, fi=Fi, fi_sz=FiSz}, put({?MODULE, cl}, CLI), CLI. check_temp(Buff, Current, Req, _, _, _) when Current >= Req -> {Buff, Current}; check_temp(undefined, _, Req, Ctxt, Opt, none) -> {ok, Buff} = cl:create_buffer(Ctxt, Opt, Req), {Buff, Req}; check_temp(undefined, _, Req, Ctxt, Opt, Fun) -> {ok,Buff} = cl:create_buffer(Ctxt, Opt, Req, Fun()), {Buff, Req}; check_temp(Buff0, _, Req, Ctxt, Opt, Data) -> cl:release_mem_object(Buff0), check_temp(undefined, 0, Req, Ctxt, Opt, Data). verify_size(#base{f=Fs, e=Es, v=Vs, level=N}, #cli{device=Device}) -> NoFs = size(Fs) div 16, NoEs = size(Es) div 16, NoVs = size(Vs) div 16, {ok, DevTotal} = cl:get_device_info(Device, max_mem_alloc_size), Res = verify_size_1(N-1, N, NoFs*4, NoEs*4, NoVs+NoEs+NoFs, DevTotal), case Res of false -> io:format("Can not subdivide, out of memory~n",[]), exit(out_of_memory); {MaxFs, MaxEs, MaxVs} -> {NoFs, NoEs, NoVs, NoFs*4, MaxFs, MaxEs, MaxVs} end. verify_size_1(N, No, Fs, Es, Vs, CardMax) -> VertexSz = (3+3)*4*4, Total = Fs*VertexSz+2*(Fs*16+Es*16+Vs*16), case Total < CardMax of true when N == 0 -> {Fs,Es,Vs}; true -> case verify_size_1(N-1, No, Fs*4, Es*4, Vs+Fs+Es, CardMax) of false -> io:format("Out of memory, does not meet the number of sub-division" "levels ~p(~p)~n",[No-N,No]), {Fs,Es,Vs}; Other -> Other end; false -> false end. %%%%% OpenGL initGL(Canvas) -> {W,H} = wxWindow:getClientSize(Canvas), io:format("Size ~p ~n",[{W,H}]), gl:viewport(0,0,W,H), gl:matrixMode(?GL_PROJECTION), gl:loadIdentity(), gl:ortho( -10.0, 10.0, -10.0*H/W, 10.0*H/W, -100.0, 100.0), gl:enable(?GL_DEPTH_TEST), gl:depthFunc(?GL_LESS), gl:clearColor(0.8,0.8,0.8,1.0), gl:shadeModel(?GL_SMOOTH), gl:disable(?GL_CULL_FACE), %% Nowadays you should really use a shader to do the lighting but I'm lazy. gl:enable(?GL_COLOR_MATERIAL), gl:enable(?GL_LIGHTING), gl:lightfv(?GL_LIGHT0, ?GL_DIFFUSE, {1,1,1,1}), gl:lightfv(?GL_LIGHT0, ?GL_SPECULAR, {0.5,0.5,0.5,1}), gl:lightfv(?GL_LIGHT0, ?GL_POSITION, {0.71,0.71,0.0,0.0}), gl:enable(?GL_LIGHT0), gl:enable(?GL_BLEND), gl:blendFunc(?GL_SRC_ALPHA, ?GL_ONE_MINUS_SRC_ALPHA), ok. -define(VS, {{ 0.5, 0.5, -0.5}, %1 { 0.5, -0.5, -0.5}, %2 {-0.5, -0.5, -0.5}, {-0.5, 0.5, -0.5}, %4 {-0.5, 0.5, 0.5}, { 0.5, 0.5, 0.5}, %6 { 0.5, -0.5, 0.5}, {-0.5, -0.5, 0.5}}).%8 -define(FS, %% Faces Normal [{{1,2,3,4},{0,0,-1} }, % {{3,8,5,4},{-1,0,0}}, % {{1,6,7,2},{1,0,0} }, % {{6,5,8,7},{0,0,1} }, % {{6,1,4,5},{0,1,0} }, % {{7,8,3,2},{0,-1,0}}]). drawBox(Deg) -> gl:matrixMode(?GL_MODELVIEW), gl:loadIdentity(), gl:rotatef(Deg, 0.0, 1.0, 0.3), gl:rotatef(20, 1.0, 0.0, 1.0), gl:'begin'(?GL_QUADS), lists:foreach(fun(Face) -> drawFace(Face,?VS) end, ?FS), gl:'end'(). drawFace({{V1,V2,V3,V4},N={N1,N2,N3}}, Cube) -> gl:normal3fv(N), gl:color3f(abs(N1),abs(N2),abs(N3)), gl:texCoord2f(0.0, 1.0), gl:vertex3fv(element(V1, Cube)), gl:texCoord2f(0.0, 0.0), gl:vertex3fv(element(V2, Cube)), gl:texCoord2f(1.0, 0.0), gl:vertex3fv(element(V3, Cube)), gl:texCoord2f(1.0, 1.0), gl:vertex3fv(element(V4, Cube)). setup_menus(Frame) -> MenuBar = wxMenuBar:new(), Menu = wxMenu:new([]), true = wxMenuBar:append(MenuBar, Menu, "File"), wxMenu:append(Menu, ?wxID_ABOUT,"About"), wxMenu:append(Menu, ?wxID_EXIT, "Quit"), ok = wxFrame:connect(Frame, command_menu_selected), ok = wxFrame:setMenuBar(Frame,MenuBar). about_box(Frame, #cli{device=Device}) -> Env = wx:get_env(), OsInfo = [wx_misc:getOsDescription(),gl:getString(?GL_VENDOR), gl:getString(?GL_RENDERER),gl:getString(?GL_VERSION)], DeviceInfo = [{Type, cl:get_device_info(Device, Type)} || Type <- [name, vendor, version]], spawn(fun() -> wx:set_env(Env), Str = "An OpenGL demo showing how to combine " " OpenCL and OpenGL, Catmull-Clark subdivision is done in OpenCL\n" " The transparent \"box\" is the original mesh and the subdivided" " yellow pipes is the result of the subdivision\n\n", Info = io_lib:format("Os: ~s~n~nGL Vendor: ~s~n" "GL Renderer: ~s~nGL Version: ~s~n", OsInfo), CLInfo = [io_lib:format("~-25.w ~s~n",[Type,I]) || {Type, {ok, I}} <- DeviceInfo], MD = wxMessageDialog:new(Frame, [Str, Info, "\nOpenCL info:\n",CLInfo], [{style, ?wxOK}, {caption, "Opengl Example"}]), wxDialog:showModal(MD), wxDialog:destroy(MD) end), ok. faces() -> <<1,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,4,0,0,0,5,0,0,0,3,0,0,0,1,0,0,0,6,0,0,0, 7,0,0,0,5,0,0,0,4,0,0,0,0,0,0,0,2,0,0,0,7,0,0,0,6,0,0,0,4,0,0,0,9,0,0,0, 8,0,0,0,6,0,0,0,10,0,0,0,11,0,0,0,9,0,0,0,4,0,0,0,12,0,0,0,13,0,0,0,11, 0,0,0,10,0,0,0,6,0,0,0,8,0,0,0,13,0,0,0,12,0,0,0,1,0,0,0,15,0,0,0,14,0, 0,0,4,0,0,0,16,0,0,0,17,0,0,0,15,0,0,0,1,0,0,0,10,0,0,0,18,0,0,0,17,0,0, 0,16,0,0,0,4,0,0,0,14,0,0,0,18,0,0,0,10,0,0,0,6,0,0,0,20,0,0,0,19,0,0,0, 0,0,0,0,12,0,0,0,21,0,0,0,20,0,0,0,6,0,0,0,22,0,0,0,23,0,0,0,21,0,0,0,12, 0,0,0,0,0,0,0,19,0,0,0,23,0,0,0,22,0,0,0,22,0,0,0,25,0,0,0,24,0,0,0,16,0, 0,0,12,0,0,0,26,0,0,0,25,0,0,0,22,0,0,0,10,0,0,0,27,0,0,0,26,0,0,0,12,0, 0,0,16,0,0,0,24,0,0,0,27,0,0,0,10,0,0,0,0,0,0,0,29,0,0,0,28,0,0,0,1,0,0, 0,22,0,0,0,30,0,0,0,29,0,0,0,0,0,0,0,16,0,0,0,31,0,0,0,30,0,0,0,22,0,0, 0,1,0,0,0,28,0,0,0,31,0,0,0,16,0,0,0,29,0,0,0,33,0,0,0,32,0,0,0,28,0,0, 0,30,0,0,0,34,0,0,0,33,0,0,0,29,0,0,0,31,0,0,0,35,0,0,0,34,0,0,0,30,0,0, 0,28,0,0,0,32,0,0,0,35,0,0,0,31,0,0,0,25,0,0,0,37,0,0,0,36,0,0,0,24,0,0, 0,26,0,0,0,38,0,0,0,37,0,0,0,25,0,0,0,27,0,0,0,39,0,0,0,38,0,0,0,26,0,0, 0,24,0,0,0,36,0,0,0,39,0,0,0,27,0,0,0,20,0,0,0,41,0,0,0,40,0,0,0,19,0,0, 0,21,0,0,0,42,0,0,0,41,0,0,0,20,0,0,0,23,0,0,0,43,0,0,0,42,0,0,0,21,0,0, 0,19,0,0,0,40,0,0,0,43,0,0,0,23,0,0,0,15,0,0,0,45,0,0,0,44,0,0,0,14,0,0, 0,17,0,0,0,46,0,0,0,45,0,0,0,15,0,0,0,18,0,0,0,47,0,0,0,46,0,0,0,17,0,0, 0,14,0,0,0,44,0,0,0,47,0,0,0,18,0,0,0,9,0,0,0,49,0,0,0,48,0,0,0,8,0,0,0, 11,0,0,0,50,0,0,0,49,0,0,0,9,0,0,0,13,0,0,0,51,0,0,0,50,0,0,0,11,0,0,0, 8,0,0,0,48,0,0,0,51,0,0,0,13,0,0,0,3,0,0,0,53,0,0,0,52,0,0,0,2,0,0,0,5, 0,0,0,54,0,0,0,53,0,0,0,3,0,0,0,7,0,0,0,55,0,0,0,54,0,0,0,5,0,0,0,2,0, 0,0,52,0,0,0,55,0,0,0,7,0,0,0>>. edges() -> <<1,0,0,0,0,0,0,0,20,0,0,0,0,0,0,0,1,0,0,0,4,0,0,0,1,0,0,0,8,0,0,0,1,0,0, 0,16,0,0,0,9,0,0,0,23,0,0,0,0,0,0,0,6,0,0,0,12,0,0,0,3,0,0,0,0,0,0,0,22, 0,0,0,21,0,0,0,15,0,0,0,6,0,0,0,4,0,0,0,4,0,0,0,2,0,0,0,6,0,0,0,12,0,0, 0,13,0,0,0,7,0,0,0,4,0,0,0,10,0,0,0,5,0,0,0,11,0,0,0,16,0,0,0,22,0,0,0, 16,0,0,0,22,0,0,0,16,0,0,0,10,0,0,0,10,0,0,0,19,0,0,0,22,0,0,0,12,0,0,0, 17,0,0,0,14,0,0,0,12,0,0,0,10,0,0,0,18,0,0,0,6,0,0,0,3,0,0,0,2,0,0,0,0, 0,0,0,44,0,0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,5,0,0,0,3,0,0,0,1,0,0,0, 45,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,7,0,0,0,5,0,0,0,2,0,0,0,46,0,0, 0,4,0,0,0,5,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,7,0,0,0,3,0,0,0,47,0,0,0,6,0,0, 0,7,0,0,0,2,0,0,0,3,0,0,0,9,0,0,0,8,0,0,0,4,0,0,0,40,0,0,0,6,0,0,0,8,0,0, 0,7,0,0,0,4,0,0,0,11,0,0,0,9,0,0,0,5,0,0,0,41,0,0,0,4,0,0,0,9,0,0,0,4,0, 0,0,5,0,0,0,13,0,0,0,11,0,0,0,6,0,0,0,42,0,0,0,10,0,0,0,11,0,0,0,5,0,0,0, 6,0,0,0,8,0,0,0,13,0,0,0,7,0,0,0,43,0,0,0,12,0,0,0,13,0,0,0,6,0,0,0,7,0, 0,0,15,0,0,0,14,0,0,0,8,0,0,0,36,0,0,0,4,0,0,0,14,0,0,0,11,0,0,0,8,0,0, 0,17,0,0,0,15,0,0,0,9,0,0,0,37,0,0,0,1,0,0,0,15,0,0,0,8,0,0,0,9,0,0,0, 18,0,0,0,17,0,0,0,10,0,0,0,38,0,0,0,16,0,0,0,17,0,0,0,9,0,0,0,10,0,0,0, 14,0,0,0,18,0,0,0,11,0,0,0,39,0,0,0,10,0,0,0,18,0,0,0,10,0,0,0,11,0,0,0, 20,0,0,0,19,0,0,0,12,0,0,0,32,0,0,0,0,0,0,0,19,0,0,0,15,0,0,0,12,0,0,0, 21,0,0,0,20,0,0,0,13,0,0,0,33,0,0,0,6,0,0,0,20,0,0,0,12,0,0,0,13,0,0,0, 23,0,0,0,21,0,0,0,14,0,0,0,34,0,0,0,12,0,0,0,21,0,0,0,13,0,0,0,14,0,0,0, 19,0,0,0,23,0,0,0,15,0,0,0,35,0,0,0,22,0,0,0,23,0,0,0,14,0,0,0,15,0,0,0, 25,0,0,0,24,0,0,0,16,0,0,0,28,0,0,0,16,0,0,0,24,0,0,0,19,0,0,0,16,0,0,0, 26,0,0,0,25,0,0,0,17,0,0,0,29,0,0,0,22,0,0,0,25,0,0,0,16,0,0,0,17,0,0,0, 27,0,0,0,26,0,0,0,18,0,0,0,30,0,0,0,12,0,0,0,26,0,0,0,17,0,0,0,18,0,0,0, 24,0,0,0,27,0,0,0,19,0,0,0,31,0,0,0,10,0,0,0,27,0,0,0,18,0,0,0,19,0,0,0, 29,0,0,0,28,0,0,0,20,0,0,0,24,0,0,0,1,0,0,0,28,0,0,0,23,0,0,0,20,0,0,0, 30,0,0,0,29,0,0,0,21,0,0,0,25,0,0,0,0,0,0,0,29,0,0,0,20,0,0,0,21,0,0,0, 31,0,0,0,30,0,0,0,22,0,0,0,26,0,0,0,22,0,0,0,30,0,0,0,21,0,0,0,22,0,0,0, 28,0,0,0,31,0,0,0,23,0,0,0,27,0,0,0,16,0,0,0,31,0,0,0,22,0,0,0,23,0,0,0, 222,255,255,255,32,0,0,0,24,0,0,0,255,255,255,255,28,0,0,0,32,0,0,0,27,0, 0,0,24,0,0,0,221,255,255,255,33,0,0,0,25,0,0,0,255,255,255,255,29,0,0,0, 33,0,0,0,24,0,0,0,25,0,0,0,220,255,255,255,34,0,0,0,26,0,0,0,255,255,255, 255,30,0,0,0,34,0,0,0,25,0,0,0,26,0,0,0,223,255,255,255,35,0,0,0,27,0,0, 0,255,255,255,255,31,0,0,0,35,0,0,0,26,0,0,0,27,0,0,0,218,255,255,255, 36,0,0,0,28,0,0,0,255,255,255,255,24,0,0,0,36,0,0,0,31,0,0,0,28,0,0,0, 217,255,255,255,37,0,0,0,29,0,0,0,255,255,255,255,25,0,0,0,37,0,0,0,28, 0,0,0,29,0,0,0,216,255,255,255,38,0,0,0,30,0,0,0,255,255,255,255,26,0,0, 0,38,0,0,0,29,0,0,0,30,0,0,0,219,255,255,255,39,0,0,0,31,0,0,0,255,255, 255,255,27,0,0,0,39,0,0,0,30,0,0,0,31,0,0,0,214,255,255,255,40,0,0,0,32, 0,0,0,255,255,255,255,19,0,0,0,40,0,0,0,35,0,0,0,32,0,0,0,213,255,255, 255,41,0,0,0,33,0,0,0,255,255,255,255,20,0,0,0,41,0,0,0,32,0,0,0,33,0, 0,0,212,255,255,255,42,0,0,0,34,0,0,0,255,255,255,255,21,0,0,0,42,0,0, 0,33,0,0,0,34,0,0,0,215,255,255,255,43,0,0,0,35,0,0,0,255,255,255,255, 23,0,0,0,43,0,0,0,34,0,0,0,35,0,0,0,210,255,255,255,44,0,0,0,36,0,0,0, 255,255,255,255,14,0,0,0,44,0,0,0,39,0,0,0,36,0,0,0,209,255,255,255,45, 0,0,0,37,0,0,0,255,255,255,255,15,0,0,0,45,0,0,0,36,0,0,0,37,0,0,0,208, 255,255,255,46,0,0,0,38,0,0,0,255,255,255,255,17,0,0,0,46,0,0,0,37,0,0, 0,38,0,0,0,211,255,255,255,47,0,0,0,39,0,0,0,255,255,255,255,18,0,0,0, 47,0,0,0,38,0,0,0,39,0,0,0,206,255,255,255,48,0,0,0,40,0,0,0,255,255, 255,255,8,0,0,0,48,0,0,0,43,0,0,0,40,0,0,0,205,255,255,255,49,0,0,0,41, 0,0,0,255,255,255,255,9,0,0,0,49,0,0,0,40,0,0,0,41,0,0,0,204,255,255, 255,50,0,0,0,42,0,0,0,255,255,255,255,11,0,0,0,50,0,0,0,41,0,0,0,42,0, 0,0,207,255,255,255,51,0,0,0,43,0,0,0,255,255,255,255,13,0,0,0,51,0,0, 0,42,0,0,0,43,0,0,0,202,255,255,255,52,0,0,0,44,0,0,0,255,255,255,255, 2,0,0,0,52,0,0,0,47,0,0,0,44,0,0,0,201,255,255,255,53,0,0,0,45,0,0,0, 255,255,255,255,3,0,0,0,53,0,0,0,44,0,0,0,45,0,0,0,200,255,255,255,54, 0,0,0,46,0,0,0,255,255,255,255,5,0,0,0,54,0,0,0,45,0,0,0,46,0,0,0,203, 255,255,255,55,0,0,0,47,0,0,0,255,255,255,255,7,0,0,0,55,0,0,0,46,0,0, 0,47,0,0,0>>. verts() -> <<0,0,128,191,0,0,128,63,0,0,128,63,0,0,192,65,0,0,128,191,0,0,128,191,0, 0,128,63,0,0,192,65,0,0,128,191,0,0,128,63,205,204,140,63,0,0,128,65,0, 0,128,191,0,0,128,191,205,204,140,63,0,0,128,65,0,0,128,63,0,0,128,191, 0,0,128,63,0,0,192,65,0,0,128,63,0,0,128,191,205,204,140,63,0,0,128,65, 0,0,128,63,0,0,128,63,0,0,128,63,0,0,192,65,0,0,128,63,0,0,128,63,205, 204,140,63,0,0,128,65,205,204,140,63,0,0,128,63,0,0,128,63,0,0,128,65, 205,204,140,63,0,0,128,191,0,0,128,63,0,0,128,65,0,0,128,63,0,0,128,191, 0,0,128,191,0,0,192,65,205,204,140,63,0,0,128,191,0,0,128,191,0,0,128, 65,0,0,128,63,0,0,128,63,0,0,128,191,0,0,192,65,205,204,140,63,0,0,128, 63,0,0,128,191,0,0,128,65,0,0,128,63,205,204,140,191,0,0,128,63,0,0,128, 65,0,0,128,191,205,204,140,191,0,0,128,63,0,0,128,65,0,0,128,191,0,0, 128,191,0,0,128,191,0,0,192,65,0,0,128,191,205,204,140,191,0,0,128,191, 0,0,128,65,0,0,128,63,205,204,140,191,0,0,128,191,0,0,128,65,0,0,128, 191,205,204,140,63,0,0,128,63,0,0,128,65,0,0,128,63,205,204,140,63,0,0, 128,63,0,0,128,65,0,0,128,63,205,204,140,63,0,0,128,191,0,0,128,65,0,0, 128,191,0,0,128,63,0,0,128,191,0,0,192,65,0,0,128,191,205,204,140,63,0, 0,128,191,0,0,128,65,0,0,128,191,0,0,128,191,205,204,140,191,0,0,128,65, 0,0,128,191,0,0,128,63,205,204,140,191,0,0,128,65,0,0,128,63,0,0,128,63, 205,204,140,191,0,0,128,65,0,0,128,63,0,0,128,191,205,204,140,191,0,0, 128,65,205,204,140,191,0,0,128,191,0,0,128,63,0,0,128,65,205,204,140, 191,0,0,128,63,0,0,128,63,0,0,128,65,205,204,140,191,0,0,128,63,0,0,128, 191,0,0,128,65,205,204,140,191,0,0,128,191,0,0,128,191,0,0,128,65,51,51, 163,192,0,0,128,191,0,0,128,63,0,0,96,65,51,51,163,192,0,0,128,63,0,0, 128,63,0,0,96,65,51,51,163,192,0,0,128,63,0,0,128,191,0,0,96,65,51,51, 163,192,0,0,128,191,0,0,128,191,0,0,96,65,0,0,128,191,0,0,128,191,51, 51,163,192,0,0,96,65,0,0,128,191,0,0,128,63,51,51,163,192,0,0,96,65,0, 0,128,63,0,0,128,63,51,51,163,192,0,0,96,65,0,0,128,63,0,0,128,191,51, 51,163,192,0,0,96,65,0,0,128,191,51,51,163,64,0,0,128,63,0,0,96,65,0, 0,128,63,51,51,163,64,0,0,128,63,0,0,96,65,0,0,128,63,51,51,163,64,0, 0,128,191,0,0,96,65,0,0,128,191,51,51,163,64,0,0,128,191,0,0,96,65,0, 0,128,63,51,51,163,192,0,0,128,63,0,0,96,65,0,0,128,191,51,51,163,192, 0,0,128,63,0,0,96,65,0,0,128,191,51,51,163,192,0,0,128,191,0,0,96,65, 0,0,128,63,51,51,163,192,0,0,128,191,0,0,96,65,51,51,163,64,0,0,128, 63,0,0,128,63,0,0,96,65,51,51,163,64,0,0,128,191,0,0,128,63,0,0,96, 65,51,51,163,64,0,0,128,191,0,0,128,191,0,0,96,65,51,51,163,64,0,0, 128,63,0,0,128,191,0,0,96,65,0,0,128,191,0,0,128,63,51,51,163,64,0, 0,96,65,0,0,128,191,0,0,128,191,51,51,163,64,0,0,96,65,0,0,128,63, 0,0,128,191,51,51,163,64,0,0,96,65,0,0,128,63,0,0,128,63,51,51,163, 64,0,0,96,65>>. cl-cl-1.2.3/examples/cl_bandwidth.erl000066400000000000000000000051301301041406700174460ustar00rootroot00000000000000%% %% SquareFloat program adpoted from "Hello World" OpenCL examples by apple %% -module(cl_bandwidth). -compile(export_all). -import(lists, [map/2]). -include("../include/cl.hrl"). -define(DATA_SIZE, 1*1024*1024). test_data(Length) -> << <> || X <- lists:duplicate(Length, 1) >>. test() -> test(all). test(DevType) -> %% Create binary with floating points 1.0 ... 1024.0 Data = test_data(?DATA_SIZE), run(Data, DevType). test(Length, DevType) when is_number(Length) -> Data = test_data(Length), run(Data, DevType). %% %% execute a kernel that squares floating point numbers %% now only one device is used (We run on cpu for debugging) %% run(Data, DevType) -> E = clu:setup(DevType), io:format("platform created\n"), N = byte_size(Data), %% number of bytes in indata io:format("Testing with byte size: ~p \n", [N]), %% Create input data memory (implicit copy_host_ptr) {ok,Input} = cl:create_buffer(E#cl.context,[read_only],N), io:format("input memory created\n"), %% Create the command queue for the first device {ok,Queue} = cl:create_queue(E#cl.context,hd(E#cl.devices),[]), io:format("queue created\n"), %% run benchmark on data messuring best write time {WriteTotal, WriteQueueTotal} = write_loop(1000, Queue, Input, Data, N), io:format("Bandwidth tested with write size: ~p bytes\n\n", [N]), io:format("Write total milliseconds: ~p\n", [WriteTotal]), io:format("Bandwidth rate: ~p KB per second\n\n", [trunc((N / (WriteTotal/1000))/1024)]), io:format("Queue total milliseconds: ~p\n", [WriteQueueTotal]), io:format("Bandwidth rate: ~p KB per second\n\n", [trunc((N / (WriteQueueTotal/1000))/1024)]), %% cl:release_mem_object(Input), cl:release_queue(Queue), clu:teardown(E). write_loop(Max, Queue, Mem, Data, N) -> write_loop(Max, Queue, Mem, Data, N, undefined, 0.0). write_loop(0, _Queue, _Mem, _Data, _N, TBest, TQBest) -> {TBest, TQBest}; write_loop(I, Queue, Mem, Data, N, TBest, TQBest) -> WriteQueueStart = erlang:now(), {ok,E1} = cl:enqueue_write_buffer(Queue, Mem, 0, N, Data, []), WriteQueueEnd = erlang:now(), WQT = timer:now_diff(WriteQueueEnd, WriteQueueStart)/1000, WriteStart = erlang:now(), ok = cl:flush(Queue), {ok,completed} = cl:wait(E1), WriteEnd = erlang:now(), WT = timer:now_diff(WriteEnd, WriteStart)/1000, if TBest =:= undefined; WT < TBest -> write_loop(I-1, Queue, Mem, Data, N, WT, WQT); true -> write_loop(I-1, Queue, Mem, Data, N, TBest, TQBest) end. cl-cl-1.2.3/examples/cl_compile.erl000066400000000000000000000125401301041406700171350ustar00rootroot00000000000000%%% @author Tony Rogvall %%% @copyright (C) 2014, Tony Rogvall %%% @doc %%% A opencl compiler wrapper %%% @end %%% Created : 9 May 2014 by Tony Rogvall -module(cl_compile). -compile(export_all). %% compile File into binary, file(File) -> file(File,all). file(File,Type) -> case lists:member({1,2}, cl:versions()) of true -> file(File,Type,"-cl-kernel-arg-info"); false -> file(File,Type,"") end. file(File,Type,Options) -> Clu = clu:setup(Type), case clu:build_source_file(Clu, File, Options) of Err = {error,_} -> Err; {ok,Program} -> info(Program) end. info(Program) -> {ok,Ds} = cl:get_program_info(Program, devices), {ok,Bs} = cl:get_program_info(Program, binaries), lists:foreach(fun(Device) -> build_info(Program, Device) end, Ds), program_info(Program), {ok,Kernels} = cl:create_kernels_in_program(Program), lists:foreach( fun(Kernel) -> {ok,KernelInfo} = cl:get_kernel_info(Kernel), io:format("KernelInfo: ~p\n", [KernelInfo]), lists:foreach( fun(Device) -> {ok,I}=cl:get_kernel_workgroup_info(Kernel,Device), io:format("KernelWorkGroupInfo: ~p\n", [I]) end, Ds), case lists:member({1,2}, cl:versions()) of true -> {ok,ArgInfo} = cl:get_kernel_arg_info(Kernel), io:format("arg_info: ~p\n", [ArgInfo]); false -> ok end end, Kernels), {ok,Bs}. program_info(Program) -> io:format("ProgramInfo:\n", []), lists:foreach( fun(Attr) -> case cl:get_program_info(Program,Attr) of {ok,Value} -> io:format(" ~s: ~p\n", [Attr,Value]); {error,Reason} -> io:format("InfoError: ~s [~p]\n", [Attr,Reason]) end end, cl:program_info()). build_info(Program, Device) -> io:format("BuildInfo @ ~w\n", [Device]), {ok,BuildInfo} = cl:get_program_build_info(Program,Device), lists:foreach( fun({Attr,Value}) -> io:format(" ~s: ~p\n", [Attr,Value]) end, BuildInfo), case lists:member({1,2}, cl:versions()) of true -> %% fixme: version handle program_build_info case cl:get_program_build_info(Program,Device,binary_type) of {ok,BinaryInfo} -> io:format(" ~s: ~p\n", [binary_type,BinaryInfo]); {error,Reason} -> io:format("InfoError: ~s [~p]\n", [binary_type,Reason]) end; false -> ok end. %% compile & link with openCL version 1.2 inc1() -> " #define FOO 5 ". inc2() -> " #define BAR 7 ". prog1() -> " #include \"inc1.h\"\n #include \"inc2.h\"\n __kernel void sum(int x, int y, __global int* z) { int i = get_global_id(0); z[i] = x + y + FOO + BAR + BAZ; } ". prog2() -> " #define FOO 5 #define BAR 7 __kernel void prod(int x, int y, __global int* z) { int i = get_global_id(0); z[i] = x*y*FOO*BAR + BAZ; } ". make_prog(Clu,prog1) -> {ok,Program} = cl:create_program_with_source(clu:context(Clu), prog1()), {ok,Inc1} = cl:create_program_with_source(clu:context(Clu), inc1()), {ok,Inc2} = cl:create_program_with_source(clu:context(Clu), inc2()), {Program, [Inc1,Inc2], ["inc1.h", "inc2.h"]}; make_prog(Clu,prog2) -> {ok,Program} = cl:create_program_with_source(clu:context(Clu), prog2()), {Program, [], []}. %% MackBookPro, mac os x 10.9 with GEForce 9400M test_12(gpu,prog1) %% fail with an error saying that the compiler can not find include %% files 'inc1.h' test_12() -> test_12(prog1, cpu). test_12(Prog, Type) -> true = lists:member({1,2}, cl:versions()), Clu = clu:setup(Type), compile_12(Clu, Prog). compile_12(Clu, Prog) -> {Program,Includes,IncludeNames} = make_prog(Clu,Prog), Ds = clu:device_list(Clu), case cl:compile_program(Program,Ds,"-DBAZ=11", Includes, IncludeNames) of ok -> Status = [get_build_status(Program, Dev) || Dev <- Ds], case lists:any(fun(success) -> true; (_) -> false end, Status) of true -> {ok,Program}; false -> Logs = get_program_logs(Program), io:format("Logs: ~s\n", [Logs]), {error,{Status,Logs}} end; Error -> Logs = get_program_logs(Program), io:format("Logs: ~s\n", [Logs]), cl:release_program(Program), {error,{Error,Logs}} end. link_12(Type) -> link_12(prog1,Type). link_12(Prog,Type) -> true = lists:member({1,2}, cl:versions()), Clu = clu:setup(Type), {ok,Prog1} = compile_12(Clu, Prog), io:format("Prog1 = ~p\n", [Prog1]), %% {ok,Prog2} = compile_12(Clu, prog2), %% io:format("Prog2 = ~p\n", [Prog2]), case cl:link_program(clu:context(Clu), clu:device_list(Clu), "", [Prog1]) of {ok, Program} -> %% check status & logs get_program_binaries(Program); Error -> Error end. get_build_status(Program, Device) -> {ok,Status} = cl:get_program_build_info(Program, Device, status), {ok,BinaryType} = cl:get_program_build_info(Program, Device, binary_type), io:format("status: ~p, binary_type=~p\n", [Status, BinaryType]), Status. get_program_logs(Program) -> {ok,DeviceList} = cl:get_program_info(Program, devices), lists:map( fun(Device) -> {ok,Log} = cl:get_program_build_info(Program,Device,log), Log end, DeviceList). get_program_binaries(Program) -> {ok,DeviceList} = cl:get_program_info(Program, devices), {ok,BinaryList} = cl:get_program_info(Program, binaries), {ok,{DeviceList, BinaryList}}. cl-cl-1.2.3/examples/cl_map.erl000066400000000000000000000212731301041406700162650ustar00rootroot00000000000000-module(cl_map). -include_lib("cl/include/cl.hrl"). -compile(export_all). -import(lists, [map/2, foreach/2, foldl/3]). -record(kwork, { queue, %% the queue local, %% kernel work_group_size freq, %% device max_clock_frequenct units, %% device max_compute_units weight, %% weight [0..1] e1,e2,e3, %% events (fixme) imem, %% input memory object omem, %% output memory object isize, %% item size idata %% input data }). test() -> Args = << <> || X <- lists:seq(1, 1024) >>, ResultList = run("fun(<>) -> X*X+1 end", Args), lists:flatmap( fun(Result) -> [ X || <> <= Result ] end, ResultList). %% %% Run a map operation over data %% Restrictions: the output must currently equal the size of %% %% run(Function, Data) -> E = clu:setup(all), %% gpu needs more work {_NArgs,ItemSize,Source} = p_program(Function), io:format("Program:\n~s\n", [Source]), {ok,Program} = clu:build_source(E, Source), {ok,Kernel} = cl:create_kernel(Program, "example"), Kws = map( fun(Device) -> {ok,Queue} = cl:create_queue(E#cl.context,Device,[]), {ok,Local} = cl:get_kernel_workgroup_info(Kernel,Device, work_group_size), {ok,Freq} = cl:get_device_info(Device,max_clock_frequency), {ok,K} = cl:get_device_info(Device, max_compute_units), #kwork{ queue=Queue, local=Local, freq=Freq, units=K, isize=ItemSize } end, E#cl.devices), io:format("Kws = ~p\n", [Kws]), %% Sum the weights and scale to [0..1] Tw = foldl(fun(K,Sum) -> Sum + K#kwork.freq*K#kwork.units end, 0, Kws), Kws1 = map(fun(K) -> K#kwork { weight = (K#kwork.freq*K#kwork.units)/Tw } end, Kws), io:format("Kws1 = ~p\n", [Kws1]), %% Split data according to Weights but start with data %% That have hard requirements on work_group_size Kws11 = lists:reverse(lists:keysort(#kwork.local,Kws1)), Kws2 = kwork_set_data(Kws11, Data), io:format("Kws2 = ~p\n", [Kws2]), %% Create memory objects Kws3 = map( fun(K) -> Nk = byte_size(K#kwork.idata), {ok,I} = cl:create_buffer(E#cl.context,[read_only],Nk), {ok,O} = cl:create_buffer(E#cl.context,[write_only],Nk), K#kwork { imem=I, omem=O } end, Kws2), io:format("Kws3 = ~p\n", [Kws3]), %% Enque input data Kws4 = map( fun(K) -> Nk = byte_size(K#kwork.idata), Count = Nk div K#kwork.isize, {ok,E1} = cl:enqueue_write_buffer(K#kwork.queue, K#kwork.imem, 0, Nk, K#kwork.idata, []), %% Set kernel arguments ok = cl:set_kernel_arg(Kernel, 0, K#kwork.imem), ok = cl:set_kernel_arg(Kernel, 1, K#kwork.omem), ok = cl:set_kernel_arg(Kernel, 2, Count), %% Enqueue the kernel Global = Count, io:format("Global=~w, Local=~w\n", [Global,K#kwork.local]), {ok,E2} = cl:enqueue_nd_range_kernel(K#kwork.queue, Kernel, [Global], [K#kwork.local], [E1]), %% Enqueue the read from device memory (wait for kernel to finish) {ok,E3} = cl:enqueue_read_buffer(K#kwork.queue, K#kwork.omem,0,Nk,[E2]), %% Now flush the queue to make things happend ok = cl:flush(K#kwork.queue), %% FIXME: here we should release E1,E2 K#kwork { e1=E1,e2=E2,e3=E3 } end, Kws3), io:format("Kws4 = ~p\n", [Kws4]), %% Wait for Result buffer to be written Bs = map( fun(K) -> io:format("E1 = ~p\n", [cl:wait(K#kwork.e1)]), io:format("E2 = ~p\n", [cl:wait(K#kwork.e2)]), {ok,Bin} = cl:wait(K#kwork.e3), cl:release_mem_object(K#kwork.imem), cl:release_mem_object(K#kwork.omem), cl:release_queue(K#kwork.queue), %% Release built into cl:wait! %% cl:release_event(K#kwork.e1), %% cl:release_event(K#kwork.e2), %% cl:release_event(K#kwork.e3), Bin end, Kws4), cl:release_kernel(Kernel), cl:release_program(Program), clu:teardown(E), Bs. %% %% Assume at least one kwork %% Data must be a multiple of local (work_group_size) %% FIXME: This must be reworked to handle all cases %% kwork_set_data([K], Data) -> [K#kwork { idata = Data }]; kwork_set_data([K|Ks], Data) -> N = byte_size(Data) div K#kwork.isize, M = trunc(K#kwork.weight * N), %% make a multiple of local L = K#kwork.local, R = ((L - (M rem L)) rem L), ML = M + R, io:format("N=~w, M=~w, L=~w, R=~w, ML=~w\n", [N,M,L,R,ML]), if ML =< N -> Md = ML*K#kwork.isize, <> = Data, [K#kwork { idata = Data1 } | kwork_set_data(Ks, Data2)]; true -> Rd = R*K#kwork.isize, [K#kwork { idata = <> } | Ks] end. %% %% Function: %% fun(<>,P1,..,Pn) -> %% F(X,P1,...Pn) %% %% Translates to %% __kernel main(__global T0* input, __global T0* output, %% const unsigned int item_count, %% T1 p1, T2 p2 .. Tn Pn) %% { %% int i = get_global_id(0); %% if (i < item_count) { %% output[i] = F(input[i],p1,..Pn) %% } %% } %% %% %% p_program(Function) -> case erl_scan:string(Function) of {ok,Ts,_Ln} -> case erl_parse:parse_exprs(add_dot(Ts)) of {ok, Exprs} -> p_fun(Exprs); Error -> Error end; Error -> Error end. add_dot(Ts) -> case lists:last(Ts) of {dot,_} -> Ts; E -> Ts ++ [{dot,element(2,E)}] end. p_fun([{'fun',_Ln1,{clauses,[{clause,_Ln3,H,[],B}]}}]) -> As = p_header(H), NArgs = length(As), {_MainVar,MainType} = hd(As), ItemSize = sizeof(MainType), {NArgs,ItemSize, lists:flatten([g_header(As), g_body(As,B)])}; p_fun(Fs) -> io:format("Fs=~p\n", [Fs]), erlang:error(not_supported). p_header(Params) -> map(fun p_arg/1, Params). g_header([{V,T}|Ps]) -> ["__kernel void example(", "__global ", g_type(T), "*", "in", ",", "__global ", g_type(T), "*", "out",",", "const uint n", map(fun({X,Tx}) -> [",", "const ", g_type(Tx), " ", atom_to_list(X)] end, Ps), ")\n", "{", " int i = get_global_id(0);\n", " if (i < n) {\n" " ", g_type(T), " ", atom_to_list(V), "= in[i];\n" ]. g_body(Vs,[E]) -> ["out[i] = ", p_expr(Vs, E),";\n", " }\n", "}\n"]; g_body(Vs,[E|Es]) -> [p_expr(Vs,E),";\n", g_body(Vs, Es)]; g_body(_Vs,[]) -> [" }\n", "}\n"]. p_arg({bin,_,[{bin_element,_,{var,_,V},Size,[Type]}]}) -> S = t_vector_size(Size), T = t_type(S,Type), {V,T}. p_expr(Vs, {var,_,V}) -> true = lists:keymember(V, 1, Vs), [atom_to_list(V)]; p_expr(_Vs, {integer,_,I}) -> [integer_to_list(I)]; p_expr(_Vs, {float,_,F}) -> io_lib:format("~f", [F]); p_expr(Vs, {op,_Ln,Op,L,R}) -> [p_expr(Vs,L),atom_to_list(Op),p_expr(Vs,R)]; p_expr(Vs, {op,_Ln,Op,M}) -> [atom_to_list(Op),p_expr(Vs,M)]; p_expr(Vs, {match,_Ln,L,R}) -> [p_expr(Vs,L),"=",p_expr(Vs,R)]; p_expr(Vs, {record_field,_Ln,{var,_,V},{atom,_,Selector}}) -> true = lists:keymember(V, 1, Vs), [atom_to_list(V),".",atom_to_list(Selector)]; p_expr(Vs, {record_field,_Ln,Expr,{atom,_,Selector}}) -> E = p_expr(Vs, Expr), %% fixme: normalize vector selector and check that %% the permutation is valid. [E,".",atom_to_list(Selector)]; p_expr(Vs, {call,_Ln,{atom,_,F},As}) -> Ps = map(fun(A) -> p_expr(Vs, A) end, As), [atom_to_list(F),"(", g_args(Ps), ")"]. t_vector_size(default) -> default; t_vector_size({integer,_,Sz}) -> Sz. g_args([]) -> []; g_args([A]) -> [A]; g_args([A|As]) -> [A,"," | g_args(As)]. g_type({T,S}) when is_atom(T), is_integer(S) -> [atom_to_list(T),integer_to_list(T)]; g_type(T) when is_atom(T) -> [atom_to_list(T)]. %% size scalar type sizeof('char') -> 1; sizeof('uchar') -> 1; sizeof('short') -> 2; sizeof('ushort') -> 2; sizeof('int') -> 4; sizeof('uint') -> 4; sizeof('long') -> 8; sizeof('ulong') -> 8; sizeof('float') -> 4; sizeof('half') -> 2; sizeof({T,default}) -> sizeof(T); sizeof({T,S}) -> S*sizeof(T). %% scalar types (api -> opencl) t_type(Size,Type) -> Scalar = t_type(Type), if Size == default -> Scalar; Size == 1 -> Scalar; Scalar == 'half' -> erlang:error({bad_vector_type,Scalar,Size}); Size == 2 -> {Scalar,2}; Size == 4 -> {Scalar,4}; Size == 8 -> {Scalar,8}; Size == 16 -> {Scalar,16}; true -> erlang:error({bad_vector_type,Scalar,Size}) end. t_type(cl_char) -> 'char'; t_type(cl_uchar) -> 'uchar'; t_type(cl_short) -> 'short'; t_type(cl_ushort) -> 'ushort'; t_type(cl_int) -> 'int'; t_type(cl_uint) -> 'uint'; t_type(cl_long) -> 'long'; t_type(cl_ulong) -> 'ulong'; t_type(cl_float) -> 'float'; t_type(cl_half) -> 'half'; t_type(T) -> erlang:error({bad_type,T}). cl-cl-1.2.3/examples/cl_mul.erl000066400000000000000000000112221301041406700162760ustar00rootroot00000000000000%%% File : cl_mul.erl %%% Author : Tony Rogvall %%% Description : Multiply matrix with list of matrices %%% Created : 16 Nov 2009 by Tony Rogvall -module(cl_mul). -compile(export_all). -import(lists, [map/2]). -include("../include/cl.hrl"). -define(DATA_SIZE, 1024). -define(ITEM_SIZE, (16*4)). encode_matrix({float16,M}) -> encode_matrix(M); encode_matrix({ X1, X2, X3, X4 , X5, X6, X7, X8 , X9, X10,X11,X12 , X13,X14,X15,X16}) -> <>. decode_matrix(Data) -> case Data of << ?cl_float(A11), ?cl_float(A12), ?cl_float(A13), ?cl_float(A14), ?cl_float(A21), ?cl_float(A22), ?cl_float(A23), ?cl_float(A24), ?cl_float(A31), ?cl_float(A32), ?cl_float(A33), ?cl_float(A34), ?cl_float(A41), ?cl_float(A42), ?cl_float(A43), ?cl_float(A44), Rest/binary >> -> [{A11,A12,A13,A14, A21,A22,A23,A24, A31,A32,A33,A34, A41,A42,A43,A44} | decode_matrix(Rest)]; <<>> -> [] end. id_matrix() -> {float16,{1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1}}. zero_matrix() -> {float16,{0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}}. r() -> random:uniform(). random_matrices(N) -> list_to_binary( lists:map( fun(_I) -> M = {r(),r(),r(),r(), r(),r(),r(),r(), r(),r(),r(),r(), r(),r(),r(),r()}, encode_matrix(M) end, lists:seq(1, N))). test_data() -> random_matrices(4). dump_data(Bin) -> io:format("data=~p\n", [decode_matrix(Bin)]). test() -> test(all). test(DevType) -> %% Create binary with floating points 1.0 ... 1024.0 Data = test_data(), run(Data, DevType). examples_dir() -> filename:join(code:lib_dir(cl), "examples"). %% %% execute a kernel that squares floating point numbers %% now only one device is used (We run on cpu for debugging) %% run(Data, DevType) -> E = clu:setup(DevType), io:format("platform created\n"), Filename = filename:join(examples_dir(),"mul4x4.cl"), io:format("build: ~s\n", [Filename]), {ok,Program} = clu:build_source_file(E, Filename), io:format("program built\n"), N = byte_size(Data), %% number of bytes in indata Count = N div ?ITEM_SIZE, %% number of matrices in indata %% Create input data memory (implicit copy_host_ptr) {ok,Input} = cl:create_buffer(E#cl.context,[read_only],N), io:format("input memory created\n"), %% Create the output memory {ok,Output} = cl:create_buffer(E#cl.context,[write_only],N), io:format("output memory created\n"), %% Create the command queue for the first device {ok,Queue} = cl:create_queue(E#cl.context,hd(E#cl.devices),[]), io:format("queue created\n"), %% Create the squre kernel object {ok,Kernel} = cl:create_kernel(Program, "mul4x4"), io:format("kernel created: ~p\n", [Kernel]), dump_data(Data), %% Write data into input array {ok,Event1} = cl:enqueue_write_buffer(Queue, Input, 0, N, Data, []), io:format("write data enqueued\n"), erlang:display_string("enqueu write\n"), %% Set kernel arguments clu:apply_kernel_args(Kernel, [Input,Output,encode_matrix(id_matrix()),{uint,Count}]), io:format("kernel args set\n"), Device = hd(E#cl.devices), {ok,Local} = cl:get_kernel_workgroup_info(Kernel, Device, work_group_size), io:format("work_group_size = ~p\n", [Local]), %% Enqueue the kernel Global = Count, if Local > Count -> LocalWork = Count; true -> LocalWork = Local end, {ok,Event2} = cl:enqueue_nd_range_kernel(Queue, Kernel, [Global], [LocalWork], [Event1]), io:format("nd range [~w, ~w] kernel enqueued\n", [[Global],[LocalWork]]), %% Enqueue the read from device memory (wait for kernel to finish) {ok,Event3} = cl:enqueue_read_buffer(Queue,Output,0,N,[Event2]), io:format("read buffer enqueued\n"), %% Now flush the queue to make things happend ok = cl:flush(Queue), io:format("flushed\n"), %% Wait for Result buffer to be written io:format("wait\n"), io:format("Event1 = ~p\n", [cl:wait(Event1,1000)]), io:format("Event2 = ~p\n", [cl:wait(Event2,1000)]), Event3Res = cl:wait(Event3,1000), io:format("Event3 = ~p\n", [Event3Res]), %% cl:release_mem_object(Input), cl:release_mem_object(Output), cl:release_queue(Queue), cl:release_kernel(Kernel), cl:release_program(Program), clu:teardown(E), case Event3Res of {ok,ResData} -> dump_data(ResData); _ -> ok end, Event3Res. cl-cl-1.2.3/examples/cl_square_float.erl000066400000000000000000000063041301041406700201730ustar00rootroot00000000000000%% %% SquareFloat program adpoted from "Hello World" OpenCL examples by apple %% -module(cl_square_float). -compile(export_all). -import(lists, [map/2]). -include("../include/cl.hrl"). -define(DATA_SIZE, 1024). source() -> " __kernel void square( __global float* input, __global float* output, const unsigned int count) { int i = get_global_id(0); if (i < count) output[i] = input[i]*input[i]; } ". test_data() -> << <> || X <- lists:seq(1,?DATA_SIZE) >>. dump_data(Bin) -> io:format("data=~p\n", [[ X || <> <= Bin ]]). test() -> test(all). test(DevType) -> %% Create binary with floating points 1.0 ... 1024.0 Data = test_data(), run(Data, DevType). %% %% execute a kernel that squares floating point numbers %% now only one device is used (We run on cpu for debugging) %% run(Data, DevType) -> E = clu:setup(DevType), io:format("platform created\n"), {ok,Program} = clu:build_source(E, source()), io:format("program built\n"), N = byte_size(Data), %% number of bytes in indata Count = N div 4, %% number of floats in indata %% Create input data memory (implicit copy_host_ptr) {ok,Input} = cl:create_buffer(E#cl.context,[read_only],N), io:format("input memory created\n"), %% Create the output memory {ok,Output} = cl:create_buffer(E#cl.context,[write_only],N), io:format("output memory created\n"), %% Create the command queue for the first device {ok,Queue} = cl:create_queue(E#cl.context,hd(E#cl.devices),[]), io:format("queue created\n"), %% Create the squre kernel object {ok,Kernel} = cl:create_kernel(Program, "square"), io:format("kernel created: ~p\n", [Kernel]), clu:apply_kernel_args(Kernel, [Input, Output, Count]), io:format("kernel args set\n"), %% Write data into input array {ok,Event1} = cl:enqueue_write_buffer(Queue, Input, 0, N, Data, []), io:format("write data enqueued\n"), erlang:display_string("enqueu write\n"), Device = hd(E#cl.devices), {ok,Local} = cl:get_kernel_workgroup_info(Kernel, Device, work_group_size), io:format("work_group_size = ~p\n", [Local]), %% Enqueue the kernel Global = Count, {ok,Event2} = cl:enqueue_nd_range_kernel(Queue, Kernel, [Global], [Local], [Event1]), io:format("nd range [~p, ~p] kernel enqueued\n", [[Global],[Local]]), %% Enqueue the read from device memory (wait for kernel to finish) {ok,Event3} = cl:enqueue_read_buffer(Queue,Output,0,N,[Event2]), io:format("read buffer enqueued\n"), %% Now flush the queue to make things happend ok = cl:flush(Queue), io:format("flushed\n"), %% Wait for Result buffer to be written io:format("wait\n"), io:format("Event1 = ~p\n", [cl:wait(Event1)]), io:format("Event2 = ~p\n", [cl:wait(Event2)]), Event3Res = cl:wait(Event3), io:format("Event3 = ~p\n", [Event3Res]), %% cl:release_mem_object(Input), cl:release_mem_object(Output), cl:release_queue(Queue), cl:release_kernel(Kernel), cl:release_program(Program), clu:teardown(E), {ok,EventResData} = Event3Res, dump_data(EventResData). cl-cl-1.2.3/examples/mul4x4.cl000066400000000000000000000011631301041406700157770ustar00rootroot00000000000000// // Multiply count 4x4 matrices with a constant matrix // __kernel void mul4x4(__global float* input, __global float* output, const float16 aa, const unsigned int count) { size_t ix; __global float* b; __global float* c; float *a = (float*)&aa; ix = get_global_id(0); if (ix < count) { int i,j,k; b = input + ix*16; c = output + ix*16; for (i=0; i<4; i++) { for (j=0; j<4; j++) { float s1 = 0.0f; for (k=0; k<4; k++) { float t1 = a[4*i+k]; float t2 = b[4*k+j]; s1 += (t1*t2); } c[4*i+j] = s1; } } } } cl-cl-1.2.3/examples/z2.cl000066400000000000000000000010461301041406700151750ustar00rootroot00000000000000// // Calculate mandelbrot // f(0) = x+yi // f(n) = f(n)^2 + c // __kernel void z2(const float x, const float y, const float xs, const float ys, const unsigned int n, __global unsigned int* out) { int i = get_global_id(0); int j = get_global_id(0); if ((i < n) && (j < n)) { int k = 0; float cx = x + i*xs; float cy = y + j*ys; float a = 0, b = 0; float a2 = 0, b2 = 0; while ((k < n) && ((a2 + b2) < 4)) { a = a2-b2 + cx; b = 2*a*b + cy; a2 = a*a; b2 = b*b; k++; } out[i*n + j] = k; } } cl-cl-1.2.3/include/000077500000000000000000000000001301041406700141265ustar00rootroot00000000000000cl-cl-1.2.3/include/cl.hrl000066400000000000000000000435171301041406700152450ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %% %% Definitions used here and there %% -ifndef(__CL_HRL__). -define(__CL_HRL__, true). -define(POINTER_SIZE, 64). %% casted by driver -define(SIZE_SIZE, 64). %% casted by driver %% transport types -define(u_int8_t(X), X:8/native-unsigned-integer). -define(u_int16_t(X), X:16/native-unsigned-integer). -define(u_int32_t(X), X:32/native-unsigned-integer). -define(u_int64_t(X), X:64/native-unsigned-integer). -define(int8_t(X), X:8/native-signed-integer). -define(int16_t(X), X:16/native-signed-integer). -define(int32_t(X), X:32/native-signed-integer). -define(int64_t(X), X:64/native-signed-integer). -define(float_t(X), X:32/native-float). -define(double_t(X), X:64/native-float). -define(pointer_t(X), X:?POINTER_SIZE/native-unsigned-integer). -define(size_t(X), X:?SIZE_SIZE/native-unsigned-integer). %% scalar types %% @type cl_char() = integer() %% @type cl_uchar() = non_neg_integer() %% @type cl_short() = integer() %% @type cl_ushort() = non_neg_integer() %% @type cl_int() = integer() %% @type cl_uint() = non_neg_integer() %% @type cl_long() = integer() %% @type cl_ulong() = non_neg_integer() %% @type cl_half() = float() %% @type cl_float() = float() %% @type cl_double() = float() -type cl_char() :: integer(). -type cl_uchar() :: non_neg_integer(). -type cl_short() :: integer(). -type cl_ushort() :: non_neg_integer(). -type cl_int() :: integer(). -type cl_uint() :: non_neg_integer(). -type cl_long() :: integer(). -type cl_ulong() :: non_neg_integer(). -type cl_half() :: float(). -type cl_float() :: float(). -type cl_double() :: float(). -define(cl_char(X), X:8/native-signed-integer). -define(cl_uchar(X), X:8/native-unsigned-integer). -define(cl_short(X), X:16/native-signed-integer). -define(cl_ushort(X), X:16/native-unsigned-integer). -define(cl_int(X), X:32/native-signed-integer). -define(cl_uint(X), X:32/native-unsigned-integer). -define(cl_long(X), X:64/native-signed-integer). -define(cl_ulong(X), X:64/native-unsigned-integer). -define(cl_half(X), X:16/native-unsigned-integer). -define(cl_float(X), X:32/native-float). -define(cl_double(X), X:64/native-float). -define(cl_pointer(X), X:?POINTER_SIZE/native-unsigned-integer). -define(cl_size(X), X:?SIZE_SIZE/native-unsigned-integer). %% vector types, OpenCL requires that all types be naturally aligned. -define(cl_char2(X1,X2), ?cl_char(X1), ?cl_char(X2)). -define(cl_char4(X1,X2,X3,X4), ?cl_char(X1), ?cl_char(X2), ?cl_char(X3), ?cl_char(X4)). -define(cl_char8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_char(X1), ?cl_char(X2), ?cl_char(X3), ?cl_char(X4), ?cl_char(X5), ?cl_char(X6), ?cl_char(X7), ?cl_char(X8)). -define(cl_char16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_char(X1), ?cl_char(X2), ?cl_char(X3), ?cl_char(X4), ?cl_char(X5), ?cl_char(X6), ?cl_char(X7), ?cl_char(X8), ?cl_char(X9), ?cl_char(X10), ?cl_char(X11), ?cl_char(X12), ?cl_char(X13), ?cl_char(X14), ?cl_char(X15), ?cl_char(X16)). -define(cl_uchar2(X1,X2), ?cl_uchar(X1), ?cl_uchar(X2)). -define(cl_uchar4(X1,X2,X3,X4), ?cl_uchar(X1), ?cl_uchar(X2), ?cl_uchar(X3), ?cl_uchar(X4)). -define(cl_uchar8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_uchar(X1), ?cl_uchar(X2), ?cl_uchar(X3), ?cl_uchar(X4), ?cl_uchar(X5), ?cl_uchar(X6), ?cl_uchar(X7), ?cl_uchar(X8)). -define(cl_uchar16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_uchar(X1), ?cl_uchar(X2), ?cl_uchar(X3), ?cl_uchar(X4), ?cl_uchar(X5), ?cl_uchar(X6), ?cl_uchar(X7), ?cl_uchar(X8), ?cl_uchar(X9), ?cl_uchar(X10), ?cl_uchar(X11), ?cl_uchar(X12), ?cl_uchar(X13), ?cl_uchar(X14), ?cl_uchar(X15), ?cl_uchar(X16)). -define(cl_short2(X1,X2), ?cl_short(X1), ?cl_short(X2)). -define(cl_short4(X1,X2,X3,X4), ?cl_short(X1), ?cl_short(X2), ?cl_short(X3), ?cl_short(X4)). -define(cl_short8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_short(X1), ?cl_short(X2), ?cl_short(X3), ?cl_short(X4), ?cl_short(X5), ?cl_short(X6), ?cl_short(X7), ?cl_short(X8)). -define(cl_short16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_short(X1), ?cl_short(X2), ?cl_short(X3), ?cl_short(X4), ?cl_short(X5), ?cl_short(X6), ?cl_short(X7), ?cl_short(X8), ?cl_short(X9), ?cl_short(X10), ?cl_short(X11), ?cl_short(X12), ?cl_short(X13), ?cl_short(X14), ?cl_short(X15), ?cl_short(X16)). -define(cl_ushort2(X1,X2), ?cl_ushort(X1), ?cl_ushort(X2)). -define(cl_ushort4(X1,X2,X3,X4), ?cl_ushort(X1), ?cl_ushort(X2), ?cl_ushort(X3), ?cl_ushort(X4)). -define(cl_ushort8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_ushort(X1), ?cl_ushort(X2), ?cl_ushort(X3), ?cl_ushort(X4), ?cl_ushort(X5), ?cl_ushort(X6), ?cl_ushort(X7), ?cl_ushort(X8)). -define(cl_ushort16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_ushort(X1), ?cl_ushort(X2), ?cl_ushort(X3), ?cl_ushort(X4), ?cl_ushort(X5), ?cl_ushort(X6), ?cl_ushort(X7), ?cl_ushort(X8), ?cl_ushort(X9), ?cl_ushort(X10), ?cl_ushort(X11), ?cl_ushort(X12), ?cl_ushort(X13), ?cl_ushort(X14), ?cl_ushort(X15), ?cl_ushort(X16)). -define(cl_int2(X1,X2), ?cl_int(X1), ?cl_int(X2)). -define(cl_int4(X1,X2,X3,X4), ?cl_int(X1), ?cl_int(X2), ?cl_int(X3), ?cl_int(X4)). -define(cl_int8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_int(X1), ?cl_int(X2), ?cl_int(X3), ?cl_int(X4), ?cl_int(X5), ?cl_int(X6), ?cl_int(X7), ?cl_int(X8)). -define(cl_int16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_int(X1), ?cl_int(X2), ?cl_int(X3), ?cl_int(X4), ?cl_int(X5), ?cl_int(X6), ?cl_int(X7), ?cl_int(X8), ?cl_int(X9), ?cl_int(X10), ?cl_int(X11), ?cl_int(X12), ?cl_int(X13), ?cl_int(X14), ?cl_int(X15), ?cl_int(X16)). -define(cl_uint2(X1,X2), ?cl_uint(X1), ?cl_uint(X2)). -define(cl_uint4(X1,X2,X3,X4), ?cl_uint(X1), ?cl_uint(X2), ?cl_uint(X3), ?cl_uint(X4)). -define(cl_uint8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_uint(X1), ?cl_uint(X2), ?cl_uint(X3), ?cl_uint(X4), ?cl_uint(X5), ?cl_uint(X6), ?cl_uint(X7), ?cl_uint(X8)). -define(cl_uint16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_uint(X1), ?cl_uint(X2), ?cl_uint(X3), ?cl_uint(X4), ?cl_uint(X5), ?cl_uint(X6), ?cl_uint(X7), ?cl_uint(X8), ?cl_uint(X9), ?cl_uint(X10), ?cl_uint(X11), ?cl_uint(X12), ?cl_uint(X13), ?cl_uint(X14), ?cl_uint(X15), ?cl_uint(X16)). -define(cl_long2(X1,X2), ?cl_long(X1), ?cl_long(X2)). -define(cl_long4(X1,X2,X3,X4), ?cl_long(X1), ?cl_long(X2), ?cl_long(X3), ?cl_long(X4)). -define(cl_long8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_long(X1), ?cl_long(X2), ?cl_long(X3), ?cl_long(X4), ?cl_long(X5), ?cl_long(X6), ?cl_long(X7), ?cl_long(X8)). -define(cl_long16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_long(X1), ?cl_long(X2), ?cl_long(X3), ?cl_long(X4), ?cl_long(X5), ?cl_long(X6), ?cl_long(X7), ?cl_long(X8), ?cl_long(X9), ?cl_long(X10), ?cl_long(X11), ?cl_long(X12), ?cl_long(X13), ?cl_long(X14), ?cl_long(X15), ?cl_long(X16)). -define(cl_ulong2(X1,X2), ?cl_ulong(X1), ?cl_ulong(X2)). -define(cl_ulong4(X1,X2,X3,X4), ?cl_ulong(X1), ?cl_ulong(X2), ?cl_ulong(X3), ?cl_ulong(X4)). -define(cl_ulong8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_ulong(X1), ?cl_ulong(X2), ?cl_ulong(X3), ?cl_ulong(X4), ?cl_ulong(X5), ?cl_ulong(X6), ?cl_ulong(X7), ?cl_ulong(X8)). -define(cl_ulong16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_ulong(X1), ?cl_ulong(X2), ?cl_ulong(X3), ?cl_ulong(X4), ?cl_ulong(X5), ?cl_ulong(X6), ?cl_ulong(X7), ?cl_ulong(X8), ?cl_ulong(X9), ?cl_ulong(X10), ?cl_ulong(X11), ?cl_ulong(X12), ?cl_ulong(X13), ?cl_ulong(X14), ?cl_ulong(X15), ?cl_ulong(X16)). -define(cl_float2(X1,X2), ?cl_float(X1), ?cl_float(X2)). -define(cl_float4(X1,X2,X3,X4), ?cl_float(X1), ?cl_float(X2), ?cl_float(X3), ?cl_float(X4)). -define(cl_float8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_float(X1), ?cl_float(X2), ?cl_float(X3), ?cl_float(X4), ?cl_float(X5), ?cl_float(X6), ?cl_float(X7), ?cl_float(X8)). -define(cl_float16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_float(X1), ?cl_float(X2), ?cl_float(X3), ?cl_float(X4), ?cl_float(X5), ?cl_float(X6), ?cl_float(X7), ?cl_float(X8), ?cl_float(X9), ?cl_float(X10), ?cl_float(X11), ?cl_float(X12), ?cl_float(X13), ?cl_float(X14), ?cl_float(X15), ?cl_float(X16)). -define(cl_double2(X1,X2), ?cl_double(X1), ?cl_double(X2)). -define(cl_double4(X1,X2,X3,X4), ?cl_double(X1), ?cl_double(X2), ?cl_double(X3), ?cl_double(X4)). -define(cl_double8(X1,X2,X3,X4,X5,X6,X7,X8), ?cl_double(X1), ?cl_double(X2), ?cl_double(X3), ?cl_double(X4), ?cl_double(X5), ?cl_double(X6), ?cl_double(X7), ?cl_double(X8)). -define(cl_double16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), ?cl_double(X1), ?cl_double(X2), ?cl_double(X3), ?cl_double(X4), ?cl_double(X5), ?cl_double(X6), ?cl_double(X7), ?cl_double(X8), ?cl_double(X9), ?cl_double(X10), ?cl_double(X11), ?cl_double(X12), ?cl_double(X13), ?cl_double(X14), ?cl_double(X15), ?cl_double(X16)). %% @type cl_platform_id() = { {'object', 1, non_neg_integer() } } %% @type cl_device_id() = { {'object', 2, non_neg_integer() } } %% @type cl_context() = { {'object', 3, non_neg_integer() } } %% @type cl_queue() = { {'object', 4, non_neg_integer() } } %% @type cl_mem() = { {'object', 5, non_neg_integer() } } %% @type cl_sampler() = { {'object', 6, non_neg_integer() } } %% @type cl_program() = { {'object', 7, non_neg_integer() } } %% @type cl_kernel() = { {'object', 8, non_neg_integer() } } %% @type cl_event() = { {'object', 9, non_neg_integer() } } -type cl_platform_id() :: {Type::atom(), 1, non_neg_integer() } . -type cl_device_id() :: {Type::atom(), 2, non_neg_integer() } . -type cl_context() :: {Type::atom(), 3, non_neg_integer() } . -type cl_queue() :: {Type::atom(), 4, non_neg_integer() } . -type cl_mem() :: {Type::atom(), 5, non_neg_integer() } . -type cl_sampler() :: {Type::atom(), 6, non_neg_integer() } . -type cl_program() :: {Type::atom(), 7, non_neg_integer() } . -type cl_kernel() :: {Type::atom(), 8, non_neg_integer() } . -type cl_event() :: {Type::atom(), 9, non_neg_integer() } . %% @type cl_error() = { %% 'device_not_found' | %% 'device_not_available' | %% 'compiler_not_available' | %% 'mem_object_allocation_failure' | %% 'out_of_resources' | %% 'out_of_host_memory' | %% 'profiling_info_not_available' | %% 'mem_copy_overlap' | %% 'image_format_mismatch' | %% 'image_format_not_supported' | %% 'build_program_failure' | %% 'map_failure' | %% 'invalid_value' | %% 'invalid_device type' | %% 'invalid_platform' | %% 'invalid_device' | %% 'invalid_context' | %% 'invalid_queue_properties' | %% 'invalid_command_queue' | %% 'invalid_host_ptr' | %% 'invalid_mem_object' | %% 'invalid_image_format_descriptor' | %% 'invalid_image_size' | %% 'invalid_sampler' | %% 'invalid_binary' | %% 'invalid_build_options' | %% 'invalid_program' | %% 'invalid_program_executable' | %% 'invalid_kernel_name' | %% 'invalid_kernel_definition' | %% 'invalid_kernel' | %% 'invalid_arg_index' | %% 'invalid_arg_value' | %% 'invalid_arg_size' | %% 'invalid_kernel_args' | %% 'invalid_work_dimension' | %% 'invalid_work_group_size' | %% 'invalid_work_item size' | %% 'invalid_global_offset' | %% 'invalid_event_wait_list' | %% 'invalid_event' | %% 'invalid_operation' | %% 'invalid_gl_object' | %% 'invalid_buffer_size' | %% 'invalid_mip_level' | %% 'unknown' }. -type cl_error() :: 'device_not_found' | 'device_not_available' | 'compiler_not_available' | 'mem_object_allocation_failure' | 'out_of_resources' | 'out_of_host_memory' | 'profiling_info_not_available' | 'mem_copy_overlap' | 'image_format_mismatch' | 'image_format_not_supported' | 'build_program_failure' | 'map_failure' | 'invalid_value' | 'invalid_device type' | 'invalid_platform' | 'invalid_device' | 'invalid_context' | 'invalid_queue_properties' | 'invalid_command_queue' | 'invalid_host_ptr' | 'invalid_mem_object' | 'invalid_image_format_descriptor' | 'invalid_image_size' | 'invalid_sampler' | 'invalid_binary' | 'invalid_build_options' | 'invalid_program' | 'invalid_program_executable' | 'invalid_kernel_name' | 'invalid_kernel_definition' | 'invalid_kernel' | 'invalid_arg_index' | 'invalid_arg_value' | 'invalid_arg_size' | 'invalid_kernel_args' | 'invalid_work_dimension' | 'invalid_work_group_size' | 'invalid_work_item size' | 'invalid_global_offset' | 'invalid_event_wait_list' | 'invalid_event' | 'invalid_operation' | 'invalid_gl_object' | 'invalid_buffer_size' | 'invalid_mip_level' | 'unknown' . -define(cl_platform_id(X), ?cl_pointer(X)). -define(cl_device_id(X), ?cl_pointer(X)). -define(cl_context(X), ?cl_pointer(X)). -define(cl_command_queue(X), ?cl_pointer(X)). -define(cl_mem(X), ?cl_pointer(X)). -define(cl_program(X), ?cl_pointer(X)). -define(cl_kernel(X), ?cl_pointer(X)). -define(cl_event(X), ?cl_pointer(X)). -define(cl_sampler(X), ?cl_pointer(X)). -define(cl_bool(X), ?cl_uint(X)). -define(cl_bitfield(X), ?cl_ulong(X)). -define(cl_device_type(X), ?cl_bitfield(X)). -define(cl_platform_info(X), ?cl_uint(X)). -define(cl_device_info(X), ?cl_uint(X)). -define(cl_device_address_info(X), ?cl_bitfield(X)). -define(cl_device_fp_config(X), ?cl_bitfield(X)). -define(cl_device_mem_cache_type(X), ?cl_uint(X)). -define(cl_device_local_mem_type(X), ?cl_uint(X)). -define(cl_device_exec_capabilities(X), ?cl_bitfield(X)). -define(cl_command_queue_properties(X), ?cl_bitfield(X)). %% -define(cl_context_properties(X), ?intptr_t(X)). -define(cl_context_info(X), ?cl_uint(X)). -define(cl_command_queue_info(X), ?cl_uint(X)). -define(cl_channel_order(X), ?cl_uint(X)). -define(cl_channel_type(X), ?cl_uint(X)). -define(cl_mem_flags(X), ?cl_bitfield(X)). -define(cl_mem_object_type(X), ?cl_uint(X)). -define(cl_mem_info(X), ?cl_uint(X)). -define(cl_image_info(X), ?cl_uint(X)). -define(cl_addressing_mode(X), ?cl_uint(X)). -define(cl_filter_mode(X), ?cl_uint(X)). -define(cl_sampler_info(X), ?cl_uint(X)). -define(cl_map_flags(X), ?cl_bitfield(X)). -define(cl_program_info(X), ?cl_uint(X)). -define(cl_program_build_info(X), ?cl_uint(X)). -define(cl_build_status(X), ?cl_int(X)). -define(cl_kernel_info(X), ?cl_uint(X)). -define(cl_kernel_work_group_info(X), ?cl_uint(X)). -define(cl_event_info(X), ?cl_uint(X)). -define(cl_command_type(X), ?cl_uint(X)). -define(cl_profiling_info(X), ?cl_uint(X)). -define(CL_CHAR_BIT, 8). -define(CL_SCHAR_MAX, 127). -define(CL_SCHAR_MIN, (-127-1)). -define(CL_CHAR_MAX, ?CL_SCHAR_MAX). -define(CL_CHAR_MIN, ?CL_SCHAR_MIN). -define(CL_UCHAR_MAX, 255). -define(CL_SHRT_MAX, 32767). -define(CL_SHRT_MIN, (-32767-1)). -define(CL_USHRT_MAX, 65535). -define(CL_INT_MAX, 2147483647). -define(CL_INT_MIN, (-2147483647-1)). -define(CL_UINT_MAX, 16#ffffffff). -define(CL_LONG_MAX, 16#7FFFFFFFFFFFFFFF). -define(CL_LONG_MIN, (-16#7FFFFFFFFFFFFFFF-1)). -define(CL_ULONG_MAX, 16#FFFFFFFFFFFFFFFF). -define(CL_FLT_DIG, 6). -define(CL_FLT_MANT_DIG, 24). -define(CL_FLT_MAX_10_EXP, 38). -define(CL_FLT_MAX_EXP, 128). -define(CL_FLT_MIN_10_EXP, -37). -define(CL_FLT_MIN_EXP, -125). -define(CL_FLT_RADIX, 2). -define(CL_FLT_MAX, 3.40282347e+38). -define(CL_FLT_MIN, 1.17549435e-38). -define(CL_FLT_EPSILON, 1.19209290e-07). -define(CL_DBL_DIG, 15). -define(CL_DBL_MANT_DIG, 53). -define(CL_DBL_MAX_10_EXP, 308). -define(CL_DBL_MAX_EXP, 1024). -define(CL_DBL_MIN_10_EXP, -307). -define(CL_DBL_MIN_EXP, -1021). -define(CL_DBL_RADIX, 2). -define(CL_DBL_MAX, 1.7976931348623157e+308). -define(CL_DBL_MIN, 2.2250738585072014e-308). -define(CL_DBL_EPSILON, 2.2204460492503131e-16). -type cl_channel_order() :: r | a | rg | ra | rgb | rgba | rgba | bgra | argb | intensity | luminance | rx | rgx | rgbx | %% 1.2 depth | depth_stencil. -type cl_channel_type() :: snorm_int8 | snorm_int16 | unorm_int8 | unorm_int16 | unorm_short_565 | unorm_short_555 | unorm_int_101010 | signed_int8 | signed_int16 | signed_int32 | unsigned_int8 | unsigned_int16 | unsigned_int32 | half_float | float | %% 1.2 unorm_int24. -type cl_mem_object_type() :: buffer | image2d | image3d | %% 1.2 image2d_array | image1d | image1d_array | image1d_buffer. -record(cl_image_format, { cl_channel_order :: cl_channel_order(), cl_channel_type :: cl_channel_type() }). %% 1.2 -record(cl_image_desc, { image_type :: cl_mem_object_type(), image_width :: non_neg_integer(), image_height :: non_neg_integer(), image_depth :: non_neg_integer(), image_array_size :: non_neg_integer(), image_row_pitch :: non_neg_integer(), image_slice_pitch = 1 :: non_neg_integer(), num_mip_levels = 0 :: non_neg_integer(), num_samples = 0 :: non_neg_integer(), buffer :: cl_mem() %% when CL_MEM_OBJECT_IMAGE1D_BUFFER }). %% cl platform & default contex -record(cl, { platform, %% one platform ! devices, %% devices selected context %% context for devices }). -endif. cl-cl-1.2.3/rebar.config000066400000000000000000000016541301041406700147730ustar00rootroot00000000000000%% -*- erlang -*- %% Config file for cl-application {deps, []}. {erl_opts, [debug_info, fail_on_warning]}. {provider_hooks, [{post, [{ct, edoc}, {ct, dialyzer}]}]}. {pre_hooks, [{"(linux|darwin|solaris|win32)", compile, "make -C c_src"}, {"(freebsd)", compile, "gmake -C c_src"} ]}. %% Assumes bash (even on windows) {post_hooks, [%% Temporary hack for equal usage on rebar and rebar3 {"(linux|darwin|solaris|freebsd)", compile, "cp _build/default/lib/cl/ebin/* ebin 2> /dev/null | :"}, {"win32", compile, "xcopy _build\\default\\lib\\cl\\ebin\\*.* ebin\\ /c /q /i /y | echo ok"}, %% {"", clean, "rm -f test/*.beam"}, {"", clean, "rm -rf logs"}, {"", clean, "rm -rf doc/*.html"}, {"", clean, "rm -rf ebin/*"}, {"(linux|darwin|solaris|win32)", clean, "make -C c_src clean"}, {"(freebsd)", clean, "gmake -C c_src clean"} ]}. %% Make old-rebar avoid building cl_drv.so {port_specs, [{"priv/cl_nif.so", []}]}. cl-cl-1.2.3/src/000077500000000000000000000000001301041406700132725ustar00rootroot00000000000000cl-cl-1.2.3/src/cl.app.src000066400000000000000000000007501301041406700151620ustar00rootroot00000000000000{application, cl, [{description, "OpenCL binding for Erlang"}, {vsn, "1.2.3"}, {modules, [cl,cl10,cl11,cl12,cl13,clu]}, {env, []}, {applications,[kernel,stdlib]}, {maintainers, ["Tony Rogvall", "Dan Gudmundsson"]}, {licenses, ["BSD"]}, {links, [{"Github", "https://github.com/tonyrog/cl"}]}, %% Arrg hex auto pick up derivates, specify files instead {files, ["rebar.config", "README", "COPYRIGHT", "src", "include", "c_src/*.[ch]", "c_src/Makefile" ]} ]}. cl-cl-1.2.3/src/cl.erl000066400000000000000000002551301301041406700144020ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %%% File : cl.erl %%% Author : Tony Rogvall %%% Description : Erlang OpenCL interface %%% Created : 25 Oct 2009 by Tony Rogvall %% @doc The erlang api for OpenCL. %% %% OpenCL (Open Computing Language) is an open royalty-free standard %% for general purpose parallel programming across CPUs, GPUs and %% other processors, giving software developers portable and efficient %% access to the power of these heterogeneous processing platforms. %% %% OpenCL supports a wide range of applications, ranging from embedded %% and consumer software to HPC solutions, through a low-level, %% high-performance, portable abstraction. By creating an efficient, %% close-to-the-metal programming interface, OpenCL will form the %% foundation layer of a parallel computing ecosystem of %% platform-independent tools, middleware and applications. %% %% OpenCL consists of an API for coordinating parallel computation %% across heterogeneous processors; and a cross-platform programming %% language with a well-specified computation environment. The OpenCL %% standard: %% %%
  • Supports both data- and task-based parallel programming models
  • %%
  • Utilizes a subset of ISO C99 with extensions for parallelism
  • %%
  • Defines consistent numerical requirements based on IEEE 754
  • %%
  • Defines a configuration profile for handheld and embedded devices
  • %%
  • Efficiently interoperates with OpenGL, OpenGL ES, and other graphics APIs
  • %% %% The specification is divided into a core specification that any %% OpenCL compliant implementation must support; a handheld/embedded %% profile which relaxes the OpenCL compliance requirements for %% handheld and embedded devices; and a set of optional extensions %% that are likely to move into the core specification in later %% revisions of the OpenCL specification. %% %% The documentation is re-used with the following copyright: %% %% Copyright © 2007-2009 The Khronos Group Inc. Permission is hereby %% granted, free of charge, to any person obtaining a copy of this %% software and/or associated documentation files (the "Materials"), %% to deal in the Materials without restriction, including without %% limitation the rights to use, copy, modify, merge, publish, %% distribute, sublicense, and/or sell copies of the Materials, and to %% permit persons to whom the Materials are furnished to do so, %% subject to the condition that this copyright notice and permission %% notice shall be included in all copies or substantial portions of %% the Materials. %% %% @headerfile "../include/cl.hrl" %% -module(cl). -on_load(init/0). -export([start/0, start/1, stop/0]). -export([noop/0]). -export([versions/0]). %% Platform -export([get_platform_ids/0]). -export([platform_info/0]). -export([get_platform_info/1,get_platform_info/2]). %% Devices -export([get_device_ids/0, get_device_ids/2]). -export([create_sub_devices/2]). -export([release_device/1]). -export([retain_device/1]). -export([device_info/0]). -export([device_info_10/1]). -export([device_info_11/1]). -export([device_info_12/1]). -export([get_device_info/1,get_device_info/2]). %% Context -export([create_context/1]). -export([create_context_from_type/1]). -export([release_context/1]). -export([retain_context/1]). -export([context_info/0]). -export([get_context_info/1,get_context_info/2]). %% Command queue -export([create_queue/3]). -export([set_queue_property/3]). -export([release_queue/1]). -export([retain_queue/1]). -export([queue_info/0]). -export([get_queue_info/1,get_queue_info/2]). %% Memory object -export([create_buffer/3, create_buffer/4]). -export([create_sub_buffer/4]). -export([release_mem_object/1]). -export([retain_mem_object/1]). -export([mem_object_info/0]). -export([get_mem_object_info/1,get_mem_object_info/2]). -export([image_info/0]). -export([get_image_info/1,get_image_info/2]). -export([get_supported_image_formats/3]). -export([create_image/5]). -export([create_image2d/7]). -export([create_image3d/9]). %% Sampler -export([create_sampler/4]). -export([release_sampler/1]). -export([retain_sampler/1]). -export([sampler_info/0]). -export([get_sampler_info/1,get_sampler_info/2]). %% Program -export([create_program_with_source/2]). -export([create_program_with_binary/3]). -export([create_program_with_builtin_kernels/3]). -export([release_program/1]). -export([retain_program/1]). -export([build_program/3, async_build_program/3]). -export([unload_compiler/0]). -export([unload_platform_compiler/1]). -export([compile_program/5, async_compile_program/5]). -export([link_program/4, async_link_program/4]). -export([program_info/0]). -export([get_program_info/1,get_program_info/2]). -export([program_build_info/0]). -export([get_program_build_info/2,get_program_build_info/3]). %% Kernel -export([create_kernel/2]). -export([create_kernels_in_program/1]). -export([set_kernel_arg/3]). -export([set_kernel_arg_size/3]). -export([release_kernel/1]). -export([retain_kernel/1]). -export([kernel_info/0]). -export([get_kernel_info/1,get_kernel_info/2]). -export([kernel_workgroup_info/0]). -export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]). -export([kernel_arg_info/0]). -export([get_kernel_arg_info/1, get_kernel_arg_info/2,get_kernel_arg_info/3]). %% Events -export([enqueue_task/3, enqueue_task/4]). -export([nowait_enqueue_task/3]). -export([enqueue_nd_range_kernel/5]). -export([enqueue_nd_range_kernel/6]). -export([nowait_enqueue_nd_range_kernel/5]). -export([enqueue_marker/1]). -export([enqueue_barrier/1]). -export([enqueue_marker_with_wait_list/2]). -export([enqueue_barrier_with_wait_list/2]). -export([enqueue_wait_for_events/2]). -export([enqueue_read_buffer/5]). -export([enqueue_read_buffer_rect/10]). -export([enqueue_write_buffer/6]). -export([enqueue_write_buffer/7]). -export([nowait_enqueue_write_buffer/6]). -export([enqueue_fill_buffer/6]). -export([enqueue_write_buffer_rect/11]). -export([enqueue_read_image/7]). -export([enqueue_write_image/8]). -export([enqueue_write_image/9]). -export([nowait_enqueue_write_image/8]). -export([enqueue_copy_buffer/7]). -export([enqueue_copy_buffer_rect/11]). -export([enqueue_copy_image/6]). -export([enqueue_fill_image/6]). -export([enqueue_copy_image_to_buffer/7]). -export([enqueue_copy_buffer_to_image/7]). -export([enqueue_map_buffer/6]). -export([enqueue_map_image/6]). -export([enqueue_unmap_mem_object/3]). -export([enqueue_migrate_mem_objects/4]). -export([release_event/1]). -export([retain_event/1]). -export([event_info/0]). -export([get_event_info/1, get_event_info/2]). -export([wait/1, wait/2]). -export([wait_for_events/1]). -export([async_flush/1, flush/1]). -export([async_finish/1, finish/1]). -export([async_wait_for_event/1, wait_for_event/1]). -import(lists, [map/2, reverse/1]). -include("../include/cl.hrl"). -define(is_platform(X), element(1,X) =:= platform_t). -define(is_device(X), element(1,X) =:= device_t). -define(is_context(X), element(1,X) =:= context_t). -define(is_queue(X), element(1,X) =:= command_queue_t). -define(is_mem(X), element(1,X) =:= mem_t). -define(is_sampler(X), element(1,X) =:= sampler_t). -define(is_program(X), element(1,X) =:= program_t). -define(is_kernel(X), element(1,X) =:= kernel_t). -define(is_event(X), element(1,X) =:= event_t). -ifdef(debug). -define(DBG(F,A), io:format((F),(A))). -else. -define(DBG(F,A), ok). -endif. -define(nif_stub,nif_stub_error(?LINE)). nif_stub_error(Line) -> erlang:nif_error({nif_not_loaded,module,?MODULE,line,Line}). init() -> Nif = filename:join([code:priv_dir(cl), "cl_nif"]), ?DBG("Loading: ~s\n", [Nif]), erlang:load_nif(Nif, 0). %% %% @type start_arg() = { {'debug',boolean()} } %% -type start_arg() :: {'debug',boolean()} . %% %% @spec start([start_arg()]) -> 'ok' | {'error', term()} %% %% @doc Start the OpenCL application %% -spec start(Args::[start_arg()]) -> 'ok' | {'error', term()}. start(_Args) -> ok. %% %% @spec start() -> 'ok' | {'error', term()} %% %% @doc Start the OpenCL application %% %% @equiv start([]) %% -spec start() -> 'ok' | {'error', term()}. start() -> start([]). %% %% @spec stop() -> 'ok' | {'error', term()} %% %% @doc Stop the OpenCL application %% %% @equiv application:stop(cl) %% -spec stop() -> 'ok' | {'error', term()}. stop() -> ok. %% %% @spec noop() -> 'ok' | {'error', cl_error()} %% %% @doc Run a no operation towards the NIF object. This call can be used %% to messure the call overhead to the NIF objeect. %% -spec noop() -> 'ok' | {'error', cl_error()}. noop() -> ?nif_stub. %% %% @spec versions() -> [{Major::integer(),Minor::integer()}] %% %% @doc Run a no operation towards the NIF object. This call can be used %% to messure the call overhead to the NIF objeect. %% -spec versions() -> [{Major::integer(),Minor::integer()}]. versions() -> ?nif_stub. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Platform %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @type cl_platform_info_key() = %% 'profile' | 'name' | 'vendor' | 'extensions'. -type cl_platform_info_key() :: 'profile' | 'name' | 'vendor' | 'extensions'. %% %% @type cl_platform_info() = %% {'profile',string()} | %% {'version', string()} | %% {'name',string()} | %% {'vendor',string()} | %% {'extensions',string()}. -type cl_platform_info() :: {'profile',string()} | {'version',string()} | {'name',string()} | {'vendor',string()} | {'extensions',string()}. %% %% @spec get_platform_ids() -> %% {'ok',[cl_platform_id()]} | {'error', cl_error()} %% @doc Obtain the list of platforms available. -spec get_platform_ids() -> {'ok',[cl_platform_id()]} | {'error', cl_error()}. get_platform_ids() -> ?nif_stub. %% %% @spec platform_info() -> %% [cl_platform_info_keys()] %% @doc Returns a list of the possible platform info keys. -spec platform_info() -> [cl_platform_info_key()]. platform_info() -> [profile, version, name, vendor, extensions]. %% %% @spec get_platform_info(Platform :: cl_platform_id(), %% Info :: cl_platform_info_key()) -> %% {'ok',term()} | {'error', cl_error()} %% @doc Get specific information about the OpenCL platform. %%
    %% %%
    name
    Platform name string.
    %% %%
    vendor
    Platform vendor string.
    %% %%
    profile
    %%
    OpenCL profile string. Returns the profile name %% supported by the implementation. The profile name returned %% can be one of the following strings: %% %% FULL_PROFILE - if the implementation supports the OpenCL %% specification (functionality defined as part of the core %% specification and does not require any extensions to be supported). %% %% EMBEDDED_PROFILE - if the implementation supports the OpenCL %% embedded profile. The embedded profile is defined to be a subset for %% each version of OpenCL.
    %% %%
    version
    %%
    OpenCL version string. Returns the OpenCL version supported by the implementation.
    %% %%
    extensions
    Returns a space-separated list of extension %% names (the extension names themselves do not contain any spaces) %% supported by the platform. Extensions defined here must be %% supported by all devices associated with this platform.
    %%
    -spec get_platform_info(Platform :: cl_platform_id(), Info :: cl_platform_info_key()) -> {'ok',term()} | {'error', cl_error()}. get_platform_info(_Platform, _Info) -> ?nif_stub. %% %% @spec get_platform_info(Platform::cl_platform_id()) -> %% {'ok', [cl_platform_info()]} | {'error', cl_error()} %% @doc Get all information about the OpenCL platform. %% @see get_platform_info/2 -spec get_platform_info(Platform::cl_platform_id()) -> {'ok', [cl_platform_info()]} | {'error', cl_error()}. get_platform_info(Platform) when ?is_platform(Platform) -> get_info_list(Platform, platform_info(), fun get_platform_info/2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Devices %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @type cl_device_type() = %% {'gpu' | 'cpu' | 'accelerator' | 'all' | 'default' } %% -type cl_device_type() :: 'gpu' | 'cpu' | 'accelerator' | 'all' | 'default'. %% %% %% @type cl_device_types() = {cl_device_type() | [cl_device_type()]} %% -type cl_device_types() :: cl_device_type() | [cl_device_type()]. %% %% %% @type cl_device_info_key() = { 'type' | 'vendor_id' | 'max_compute_units' | %% 'max_work_item_dimensions' | 'max_work_group_size' | %% 'max_work_item_sizes' | %% 'preferred_vector_width_char' | 'preferred_vector_width_short' | %% 'preferred_vector_width_int' | 'preferred_vector_width_long' | %% 'preferred_vector_width_float' | 'preferred_vector_width_double' | %% 'max_clock_frequency' | 'address_bits' | 'max_read_image_args' | %% 'max_write_image_args' | 'max_mem_alloc_size' | %% 'image2d_max_width' | 'image2d_max_height' | 'image3d_max_width' | %% 'image3d_max_height' | 'image3d_max_depth' | %% 'image_support' | %% 'max_parameter_size' | 'max_samplers' | %% 'mem_base_addr_align' | 'min_data_type_align_size' | %% 'single_fp_config' | 'global_mem_cache_type' | %% 'global_mem_cacheline_size' | 'global_mem_cache_size' | 'global_mem_size' | %% 'max_constant_buffer_size' | 'max_constant_args' | %% 'local_mem_type' | 'local_mem_size' | 'error_correction_support' | %% 'profiling_timer_resolution' | 'endian_little' | 'available' | %% 'compiler_available' | 'execution_capabilities' | 'queue_properties' | %% 'name' | 'vendor' | 'driver_version' | 'profile' | 'version' | %% 'extensions' | 'platform' } %% -type cl_device_info_key() :: 'type' | 'vendor_id' | 'max_compute_units' | 'max_work_item_dimensions' | 'max_work_group_size' | 'max_work_item_sizes' | 'preferred_vector_width_char' | 'preferred_vector_width_short' | 'preferred_vector_width_int' | 'preferred_vector_width_long' | 'preferred_vector_width_float' | 'preferred_vector_width_double' | 'max_clock_frequency' | 'address_bits' | 'max_read_image_args' | 'max_write_image_args' | 'max_mem_alloc_size' | 'image2d_max_width' | 'image2d_max_height' | 'image3d_max_width' | 'image3d_max_height' | 'image3d_max_depth' | 'image_support' | 'max_parameter_size' | 'max_samplers' | 'mem_base_addr_align' | 'min_data_type_align_size' | 'single_fp_config' | 'global_mem_cache_type' | 'global_mem_cacheline_size' | 'global_mem_cache_size' | 'global_mem_size' | 'max_constant_buffer_size' | 'max_constant_args' | 'local_mem_type' | 'local_mem_size' | 'error_correction_support' | 'profiling_timer_resolution' | 'endian_little' | 'available' | 'compiler_available' | 'execution_capabilities' | 'queue_properties' | 'name' | 'vendor' | 'driver_version' | 'profile' | 'version' | 'extensions' | 'platform'. %% %% @type cl_device_info() = {cl_device_info_key(), term()} %% @todo specifiy all info types -type cl_device_info() :: {cl_device_info_key(), term()}. %% %% @spec get_device_ids() -> {'ok',[cl_device_id()]} | {'error',cl_error()} %% %% @equiv get_devive_ids(0,all) %% -spec get_device_ids() -> {'ok',[cl_device_id()]} | {'error',cl_error()}. get_device_ids() -> get_device_ids(undefined, all). %% %% @spec get_device_ids(Platform::cl_platform_id(),Type::cl_device_types()) -> %% {'ok',[cl_device_id()]} | {'error',cl_error()} %% @doc Obtain the list of devices available on a platform. %%
    Platform
    %% %% Refers to the platform ID returned by get_platform_ids or can be %% NULL. If platform is NULL, the behavior is implementation-defined.
    %% %%
    Type
    %% %% A list that identifies the type of OpenCL device. The %% device_type can be used to query specific OpenCL devices or all %% OpenCL devices available.
    %% %%
    %% %% get_device_ids/2 may return all or a subset of the actual %% physical devices present in the platform and that match %% device_type. %% %% The application can query specific capabilities of the OpenCL %% device(s) returned by get_device_ids/2. This can be used by the %% application to determine which device(s) to use. %% -spec get_device_ids(undefined|cl_platform_id(),Type::cl_device_types()) -> {'ok',[cl_device_id()]} | {'error',cl_error()}. get_device_ids(_Platform, _Type) -> ?nif_stub. -spec create_sub_devices(Device::cl_device_id(), Property:: {equally|non_neg_integer()} | {by_counts,[non_neg_integer()]} | {by_affinity_domain, numa|l4_cache|l3_cache|l2_cache|l1_cache| next_partitionable}) -> {'ok',[cl_device_id()]} | {'error',cl_error()}. create_sub_devices(_Device, _Properties) -> ?nif_stub. -spec release_device(Device::cl_device_id()) -> 'ok' | {'error', cl_error()}. release_device(_Device) -> ok. -spec retain_device(Device::cl_device_id()) -> 'ok' | {'error', cl_error()}. retain_device(_Device) -> ok. %% %% @spec device_info() -> [cl_device_info_key()] %% @doc Return a list of possible device info queries. %% @see get_device_info/2 -spec device_info() -> [cl_device_info_key()]. device_info() -> lists:foldl( fun({1,2},Acc) -> device_info_12(Acc); ({1,1},Acc) -> device_info_11(Acc); ({1,0},Acc) -> device_info_10(Acc); (_, Acc) -> Acc end, [], versions()). device_info_10(L) -> [ type, vendor_id, max_compute_units, max_work_item_dimensions, max_work_group_size, max_work_item_sizes, preferred_vector_width_char, preferred_vector_width_short, preferred_vector_width_int, preferred_vector_width_long, preferred_vector_width_float, preferred_vector_width_double, max_clock_frequency, address_bits, max_read_image_args, max_write_image_args, max_mem_alloc_size, image2d_max_width, image2d_max_height, image3d_max_width, image3d_max_height, image3d_max_depth, image_support, max_parameter_size, max_samplers, mem_base_addr_align, min_data_type_align_size, single_fp_config, global_mem_cache_type, global_mem_cacheline_size, global_mem_cache_size, global_mem_size, max_constant_buffer_size, max_constant_args, local_mem_type, local_mem_size, error_correction_support, profiling_timer_resolution, endian_little, available, compiler_available, execution_capabilities, queue_properties, name, vendor, driver_version, profile, version, extensions, platform | L ]. device_info_11(L) -> [ preferred_vector_width_half, host_unified_memory, native_vector_width_char, native_vector_width_short, native_vector_width_int, native_vector_width_long, native_vector_width_float, native_vector_width_double, native_vector_width_half, opencl_c_version | L ]. device_info_12(L) -> [ double_fp_config, linker_available, built_in_kernels, image_max_buffer_size, image_max_array_size, parent_device, partition_max_sub_devices, partition_properties, partition_affinity_domain, partition_type, reference_count, preferred_interop_user_sync, printf_buffer_size | L %% image_pitch_alignment, %% image_base_address_alignment ]. %% %% @spec get_device_info(DevID::cl_device_id(), Info::cl_device_info_key()) -> %% {'ok', term()} | {'error', cl_error()} %% @doc Get information about an OpenCL device. %% %%
    'type'

    The OpenCL device type. Currently %% supported values are one of or a combination of: CL_DEVICE_TYPE_CPU, %% CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_ACCELERATOR, or %% CL_DEVICE_TYPE_DEFAULT.

    %% %%
    'vendor_id'

    A unique device vendor identifier. An %% example of a unique device identifier could be the PCIe ID.

    %% %%
    'max_compute_units'

    The number of parallel compute %% cores on the OpenCL device. The minimum value is 1.

    %% %%
    'max_work_item_dimensions'

    Maximum dimensions that %% specify the global and local work-item IDs used by the data parallel %% execution model. (@see enqueue_nd_range_kernel/5). The %% minimum value is 3.

    %% %%
    'max_work_group_size'

    Maximum number of %% work-items in a work-group executing a kernel using the data parallel %% execution model. (@see enqueue_nd_range_kernel/5). The minimum value %% is 1.

    %% %%
    'max_work_item_sizes'

    Maximum number of work-items %% that can be specified in each dimension of the work-group to enqueue_nd_range_kernel/5.

    %%

    Returns n entries, where n is the value returned by the query for %% CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. The minimum value is (1, 1, %% 1).

    %% %%
    'preferred_vector_width_TYPE'

    Preferred native vector %% width size for built-in scalar types that can be put into vectors. The %% vector width is defined as the number of scalar elements that can be %% stored in the vector.

    If the cl_khr_fp64 extension is %% not supported, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE must return %% 0.

    %% %%
    'max_clock_frequency'

    Maximum configured clock %% frequency of the device in MHz.

    %% %%
    'address_bits'
    The default compute device address %% space size specified as an unsigned integer value in bits. Currently %% supported values are 32 or 64 bits.
    %% %%
    'max_read_image_args'

    Max number of simultaneous %% image objects that can be read by a kernel. The minimum value is 128 %% if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.

    %% %%
    'max_write_image_args'

    Max number of %% simultaneous image objects that can be written to by a kernel. The %% minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.

    %% %%
    'max_mem_alloc_size'

    Max size of memory object %% allocation in bytes. The minimum value is max (1/4th of %% CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024)

    %% %%
    'image2d_max_width'

    Max width of 2D image in %% pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is %% CL_TRUE.

    %% %%
    'image2d_max_height'

    Max height of 2D image in %% pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is %% CL_TRUE.

    %% %%
    'image3d_max_width'

    Max width of 3D image in %% pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is %% CL_TRUE.

    %% %%
    'image3d_max_height'

    Max height of 3D image in %% pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is %% CL_TRUE.

    %% %%
    'image3d_max_depth'

    Max depth of 3D image in %% pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is %% CL_TRUE.

    %% %%
    'image_support'

    Is CL_TRUE if images are supported by %% the OpenCL device and CL_FALSE otherwise.

    %% %%
    'max_parameter_size'

    Max size in bytes of the %% arguments that can be passed to a kernel. The minimum value is %% 256.

    %% %%
    'max_samplers'

    Maximum number of samplers that can be %% used in a kernel. The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT %% is CL_TRUE.

    %% %%
    'mem_base_addr_align'

    Describes the alignment in bits %% of the base address of any allocated memory object.

    %% %%
    'min_data_type_align_size'

    The smallest alignment in %% bytes which can be used for any data type.

    %%
    'single_fp_config'

    Describes single precision %% floating-point capability of the device. This is a bit-field that %% describes one or more of the following values:

    CL_FP_DENORM - %% denorms are supported

    CL_FP_INF_NAN - INF and quiet NaNs are %% supported

    CL_FP_ROUND_TO_NEAREST - round to nearest even %% rounding mode supported

    %%

    CL_FP_ROUND_TO_ZERO - round to zero rounding mode supported

    %%

    CL_FP_ROUND_TO_INF - round to +ve and -ve infinity rounding modes supported

    %%

    CL_FP_FMA - IEEE754-2008 fused multiply-add is supported

    %%

    The mandated minimum floating-point capability is CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN.

    %%
    %% %%
    'global_mem_cache_type'

    Return type: %% cl_device_mem_cache_type

    Type of global memory cache %% supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and %% CL_READ_WRITE_CACHE.

    %% %%
    'global_mem_cacheline_size'
    %%

    Size of global memory cache line in bytes.

    %%
    %% %%
    'global_mem_cache_size'
    %%

    Size of global memory cache in bytes.

    %%
    %% %%
    'global_mem_size'
    %%

    Size of global device memory in bytes.

    %%
    %% %%
    'max_constant_buffer_size'
    %%

    Max size in bytes of a constant buffer allocation. The minimum value is 64 KB.

    %% %%
    'max_constant_args'

    Max number of arguments %% declared with the __constant qualifier in a kernel. The minimum %% value is 8.

    %% %%
    'local_mem_type'

    Type of local memory %% supported. This can be set to CL_LOCAL implying dedicated local memory %% storage such as SRAM, or CL_GLOBAL.

    %% %%
    'local_mem_size'

    Size of local memory arena in %% bytes. The minimum value is 16 KB.

    %% %%
    'error_correction_support'
    Is CL_TRUE if the device %% implements error correction for the memories, caches, registers %% etc. in the device. Is CL_FALSE if the device does not implement error %% correction. This can be a requirement for certain clients of %% OpenCL.
    %% %%
    'profiling_timer_resolution'

    Describes the resolution %% of device timer. This is measured in nanoseconds.

    %% %%
    'endian_little'
    Is CL_TRUE if the OpenCL device is a %% little endian device and CL_FALSE otherwise.
    %% %%
    'available'
    Is CL_TRUE if the device is available and %% CL_FALSE if the device is not available.
    %% %%
    'compiler_available'
    Is CL_FALSE if the implementation %% does not have a compiler available to compile the program source. Is %% CL_TRUE if the compiler is available. This can be CL_FALSE for the %% embededed platform profile only.
    %% %%
    'execution_capabilities'

    Return type: %% cl_device_exec_capabilities

    Describes the execution %% capabilities of the device. This is a bit-field that describes one or %% more of the following values:

    CL_EXEC_KERNEL - The OpenCL %% device can execute OpenCL kernels.

    CL_EXEC_NATIVE_KERNEL - The %% OpenCL device can execute native kernels.

    The mandated minimum %% capability is CL_EXEC_KERNEL.

    %% %%
    'queue_properties'

    Describes the command-queue %% properties supported by the device. This is a bit-field that %% describes one or more of the following values:

    %%

    'out_of_order_exec_mode_enable'

    %%

    'profiling_enable'

    These properties are described in %% the table for create_queue/3 . The mandated minimum capability is %% 'profiling_enable'.

    %% %%
    'name'

    Device name string.

    %% %%
    'vendor'

    Vendor name string.

    %% %%
    'driver_version'

    OpenCL software driver version string

    %% %%
    'profile'

    OpenCL profile string. Returns the profile %% name supported by the device (see note). The profile name returned can %% be one of the following strings:

    %%

    FULL_PROFILE - if the device supports the OpenCL specification %% (functionality defined as part of the core %% specification and does not require any extensions %% to be supported).

    EMBEDDED_PROFILE - if %% the device supports the OpenCL embedded %% profile.

    %% %%
    'version'

    OpenCL version string.

    %% %%
    'extensions'

    Returns a space separated list of extension names (the extension %% names themselves do not contain any spaces).

    %% %%
    'platform'

    The platform associated with this device.

    %% %%
    %% %% NOTE: CL_DEVICE_PROFILE: The platform profile returns the profile that is %% implemented by the OpenCL framework. If the platform profile %% returned is FULL_PROFILE, the OpenCL framework will support devices %% that are FULL_PROFILE and may also support devices that are %% EMBEDDED_PROFILE. The compiler must be available for all devices %% i.e. CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE. If the platform %% profile returned is EMBEDDED_PROFILE, then devices that are only %% EMBEDDED_PROFILE are supported. -spec get_device_info(Device::cl_device_id(), Info::cl_device_info_key()) -> {'ok', term()} | {'error', cl_error()}. get_device_info(_Device, _Info) -> ?nif_stub. %% %% @spec get_device_info(Device) -> %% {'ok', [cl_device_info()]} | {'error', cl_error()} %% @doc Get all device info. %% @see get_device_info/2 -spec get_device_info(Device::cl_device_id()) -> {'ok', [cl_device_info()]} | {'error', cl_error()}. get_device_info(Device) -> get_info_list(Device, device_info(), fun get_device_info/2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Context %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @type cl_context_info_key() = {'reference_count' | 'devices' | 'properties'} -type cl_context_info_key() :: 'reference_count' | 'devices' | 'properties'. %% @type cl_context_info() = %% { {'reference_count', cl_uint()}, %% {'devices', [cl_device()]}, %% {'properties', [cl_int()]} } -type cl_context_info() :: {'reference_count', cl_uint()} | {'devices', [cl_device_id()]} | {'properties', [cl_int()]}. %% %% @spec create_context(DeviceList::[cl_device_id()]) -> %% {'ok', cl_context()} | {'error', cl_error()} %% @doc Creates an OpenCL context. %% %% An OpenCL context is created with one or more devices. Contexts are %% used by the OpenCL runtime for managing objects such as %% command-queues, memory, program and kernel objects and for %% executing kernels on one or more devices specified in the context. %% %% NOTE: create_context/1 and create_context_from_type/1 perform an %% implicit retain. This is very helpful for 3rd party libraries, %% which typically get a context passed to them by the %% application. However, it is possible that the application may %% delete the context without informing the library. Allowing %% functions to attach to (i.e. retain) and release a context solves %% the problem of a context being used by a library no longer being %% valid. -spec create_context(DeviceList::[cl_device_id()]) -> {'ok', cl_context()} | {'error', cl_error()}. create_context(_DeviceList) -> ?nif_stub. %% %% @spec create_context_from_type(Type::cl_device_types())-> %% {'ok', cl_context()} | {'error', cl_error()} %% @doc Create an OpenCL context from a device type that identifies the specific device(s) to use. %% %% NOTE: %% create_context_from_type/1 may return all or a subset of the %% actual physical devices present in the platform and that match %% device_type. %% %% create_context/1 and create_context_from_type/1 perform an %% implicit retain. This is very helpful for 3rd party libraries, %% which typically get a context passed to them by the %% application. However, it is possible that the application may %% delete the context without informing the library. Allowing %% functions to attach to (i.e. retain) and release a context solves %% the problem of a context being used by a library no longer being %% valid. -spec create_context_from_type(Type::cl_device_types())-> {'ok', cl_context()} | {'error', cl_error()}. create_context_from_type(Type) -> case get_device_ids(undefined, Type) of {ok,DeviceList} -> create_context(DeviceList); Error -> Error end. %% %% @spec release_context(Context::cl_context()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrement the context reference count. %% %% After the context reference count becomes zero and all the objects %% attached to context (such as memory objects, command-queues) are %% released, the context is deleted. -spec release_context(Context::cl_context()) -> 'ok' | {'error', cl_error()}. release_context(Context) when ?is_context(Context) -> ok. %% %% @spec retain_context(Context::cl_context()) -> %% 'ok' | {'error', cl_error()} %% @doc Increment the context reference count. %% @see create_context -spec retain_context(Context::cl_context()) -> 'ok' | {'error', cl_error()}. retain_context(Context) when ?is_context(Context) -> ok. %% %% @spec context_info() -> [cl_context_info_key()] %% @doc List context info queries. -spec context_info() -> [cl_context_info_key()]. context_info() -> [ reference_count, devices, properties ]. %% %% @spec get_context_info(Context::cl_context(),Info::cl_context_info_key()) -> %% {'ok', term()} | {'error', cl_error()} %% @doc Query information about a context. %% %%
    reference_count
    Return the context reference %% count. The reference count returned should be considered %% immediately stale. It is unsuitable for general use in %% applications. This feature is provided for identifying memory %% leaks.
    %% %%
    devices
    Return the list of devices in context.
    %% %%
    properties
    Return the context properties.
    %%
    -spec get_context_info(Context::cl_context(), Info::cl_context_info_key()) -> {'ok', term()} | {'error', cl_error()}. get_context_info(_Context, _Info) -> ?nif_stub. %% @spec get_context_info(Context::cl_context()) -> %% {'ok', [cl_context_info()]} | {'error', cl_error()} %% @doc Get all context info. %% @see get_context_info/2 -spec get_context_info(Context::cl_context()) -> {'ok', [cl_context_info()]} | {'error', cl_error()}. get_context_info(Context) when ?is_context(Context) -> get_info_list(Context, context_info(), fun get_context_info/2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Command Queue (Queue) %% @type cl_queue_property() = { 'out_of_order_exec_mode_enable' | %% 'profiling_enabled' } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -type cl_queue_property() :: 'out_of_order_exec_mode_enable' | 'profiling_enabled'. %% %% @spec create_queue(Context::cl_context(),Device::cl_device_id(), %% Properties::[cl_queue_property()]) -> %% {'ok', cl_queue()} | {'error', cl_error()} %% @doc Create a command-queue on a specific device. %% %%
    %%
    'out_of_order_exec_mode_enable'
    Determines %% whether the commands queued in the command-queue are executed %% in-order or out-of-order. If set, the commands in the command-queue %% are executed out-of-order. Otherwise, commands are executed %% in-order.
    %% %%
    'profiling_enabled'
    Enable or disable profiling of %% commands in the command-queue. If set, the profiling of commands is %% enabled. Otherwise profiling of commands is disabled. See %% clGetEventProfilingInfo for more information. %%
    %%
    %% %% The OpenCL functions that are submitted to a command-queue are %% enqueued in the order the calls are made but can be configured to %% execute in-order or out-of-order. The properties argument in %% clCreateCommandQueue can be used to specify the execution order. %% %% If the 'out_of_order_exec_mode_enable' property of a %% command-queue is not set, the commands enqueued to a command-queue %% execute in order. For example, if an application calls %% clEnqueueNDRangeKernel to execute kernel A followed by a %% clEnqueueNDRangeKernel to execute kernel B, the application can %% assume that kernel A finishes first and then kernel B is %% executed. If the memory objects output by kernel A are inputs to %% kernel B then kernel B will see the correct data in memory objects %% produced by execution of kernel A. If the %% 'out_of_order_exec_mode_enable' property of a commandqueue %% is set, then there is no guarantee that kernel A will finish before %% kernel B starts execution. %% %% Applications can configure the commands enqueued to a command-queue %% to execute out-of-order by setting the %% 'out_of_order_exec_mode_enable' property of the %% command-queue. This can be specified when the command-queue is %% created or can be changed dynamically using %% clCreateCommandQueue. In out-of-order execution mode there is no %% guarantee that the enqueued commands will finish execution in the %% order they were queued. As there is no guarantee that kernels will %% be executed in order, i.e. based on when the clEnqueueNDRangeKernel %% calls are made within a command-queue, it is therefore possible %% that an earlier clEnqueueNDRangeKernel call to execute kernel A %% identified by event A may execute and/or finish later than a %% clEnqueueNDRangeKernel call to execute kernel B which was called by %% the application at a later point in time. To guarantee a specific %% order of execution of kernels, a wait on a particular event (in %% this case event A) can be used. The wait for event A can be %% specified in the event_wait_list argument to clEnqueueNDRangeKernel %% for kernel B. %% %% In addition, a wait for events or a barrier command can be enqueued %% to the command-queue. The wait for events command ensures that %% previously enqueued commands identified by the list of events to %% wait for have finished before the next batch of commands is %% executed. The barrier command ensures that all previously enqueued %% commands in a command-queue have finished execution before the next %% batch of commands is executed. %% %% Similarly, commands to read, write, copy or map memory objects that %% are enqueued after clEnqueueNDRangeKernel, clEnqueueTask or %% clEnqueueNativeKernel commands are not guaranteed to wait for %% kernels scheduled for execution to have completed (if the %% 'out_of_order_exec_mode_enable' property is set). To ensure %% correct ordering of commands, the event object returned by %% clEnqueueNDRangeKernel, clEnqueueTask or clEnqueueNativeKernel can %% be used to enqueue a wait for event or a barrier command can be %% enqueued that must complete before reads or writes to the memory %% object(s) occur. -spec create_queue(Context::cl_context(),Device::cl_device_id(), Properties::[cl_queue_property()]) -> {'ok', cl_queue()} | {'error', cl_error()}. create_queue(_Context, _Device, _Properties) -> ?nif_stub. %% %% @spec set_queue_property(Queue::cl_queue(), %% Properties::[cl_queue_property()], %% Enable::bool()) -> %% 'ok' | {'error', cl_error()} %% @doc Function is deprecated and have been removed. -spec set_queue_property(_, _, _) -> no_return(). set_queue_property(_Queue, _Properties, _Enable) -> erlang:error(deprecated). %% %% @spec release_queue(Queue::cl_queue()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrements the command_queue reference count. %% %% After the command_queue reference count becomes zero and all %% commands queued to command_queue have finished (e.g., kernel %% executions, memory object updates, etc.), the command-queue is %% deleted. -spec release_queue(Queue::cl_queue()) -> 'ok' | {'error', cl_error()}. release_queue(Queue) when ?is_queue(Queue) -> ok. %% %% @spec retain_queue(Queue::cl_queue()) -> %% 'ok' | {'error', cl_error()} %% @doc Increments the command_queue reference count. %% %% create_queue/3 performs an implicit retain. This is very %% helpful for 3rd party libraries, which typically get a %% command-queue passed to them by the application. However, it is %% possible that the application may delete the command-queue without %% informing the library. Allowing functions to attach to %% (i.e. retain) and release a command-queue solves the problem of a %% command-queue being used by a library no longer being valid. -spec retain_queue(Queue::cl_queue()) -> 'ok' | {'error', cl_error()}. retain_queue(Queue) when ?is_queue(Queue) -> ok. %% @spec queue_info() -> [queue_info_keys()] %% @doc Returns the list of possible queue info items. queue_info() -> [ context, device, reference_count, properties ]. %% @spec get_queue_info(Queue, Info) -> {ok, term()} %% @doc Return the specified queue info get_queue_info(_Queue, _Info) -> ?nif_stub. %% @spec get_queue_info(Queue) -> [queue_info_keys()] %% @doc Returns all queue info. get_queue_info(Queue) when ?is_queue(Queue) -> get_info_list(Queue, queue_info(), fun get_queue_info/2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Mem %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @type cl_mem_flag() = { 'read_write' | 'write_only' | 'read_only' | %% 'use_host_ptr' | 'alloc_host_ptr' | %% 'copy_host_ptr'} %% -type cl_mem_flag() :: 'read_write' | 'write_only' | 'read_only' | 'use_host_ptr' | 'alloc_host_ptr' | 'copy_host_ptr'. %% %% @spec create_buffer(Context::cl_context(),Flags::cl_mem_flags(), %% Size::non_neg_integer()) -> %% {'ok', cl_mem()} | {'error', cl_error()} %% %% @equiv create_buffer(Context,Flags,Size,<<>>) %% -spec create_buffer(Context::cl_context(),Flags::[cl_mem_flag()], Size::non_neg_integer()) -> {'ok', cl_mem()} | {'error', cl_error()}. create_buffer(Context,Flags,Size) -> create_buffer(Context,Flags,Size,[]). %% %% @spec create_buffer(Context::cl_context(),Flags::[cl_mem_flag()], %% Size::non_neg_integer(), Data::iolist()) -> %% {'ok', cl_mem()} | {'error', cl_error()} %% @doc Creates a buffer object. %% -spec create_buffer(Context::cl_context(),Flags::[cl_mem_flag()], Size::non_neg_integer(),Data::iodata()) -> {'ok', cl_mem()} | {'error', cl_error()}. create_buffer(_Context,_Flags,_Size,_Data) -> ?nif_stub. %% %% @doc Creates a buffer object %% -spec create_sub_buffer(Buffer::cl_mem(),Flags::[cl_mem_flag()], Type::'region', Info::[non_neg_integer()]) -> {'ok', cl_mem()} | {'error', cl_error()}. %% create_sub_buffer(_Buffer, _Flags, _Type, _Info) -> ?nif_stub. %% %% @spec release_mem_object(Mem::cl_mem()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrements the memory object reference count. %% %% After the memobj reference count becomes zero and commands queued %% for execution on a command-queue(s) that use memobj have finished, %% the memory object is deleted. -spec release_mem_object(Mem::cl_mem()) -> 'ok' | {'error', cl_error()}. release_mem_object(Mem) when ?is_mem(Mem) -> ok. %% %% @spec retain_mem_object(Mem::cl_mem()) -> %% 'ok' | {'error', cl_error()} %% @doc Increments the memory object reference count. -spec retain_mem_object(Mem::cl_mem()) -> 'ok' | {'error', cl_error()}. retain_mem_object(Mem) when ?is_mem(Mem) -> ok. -type cl_mem_info_key() :: 'object_type' | 'flags' | 'size' | 'host_ptr' | 'map_count' | 'reference_count' | 'context'. %% %% @spec mem_object_info() -> %% [cl_mem_info_keys()] %% @doc Returns a list of the possible mem info keys. -spec mem_object_info() -> [cl_mem_info_key()]. mem_object_info() -> [ object_type, flags, size, host_ptr, map_count, reference_count, context ]. %% %% @spec get_mem_object_info(Mem::cl_mem(), InfoType::cl_mem_info_key()) -> %% {'ok', term()} | {'error', cl_error()} %% %% @doc Used to get InfoType information that is common to all memory objects %% (buffer and image objects). -spec get_mem_object_info(Mem::cl_mem(), Info::cl_mem_info_key()) -> {'ok', term()} | {'error', cl_error()}. get_mem_object_info(_Mem, _Info) -> ?nif_stub. %% %% @spec get_mem_object_info(Mem::cl_mem()) -> %% {'ok', term()} | {'error', cl_error()} %% %% @doc Used to get all information that is common to all memory objects %% (buffer and image objects). get_mem_object_info(Mem) when ?is_mem(Mem) -> get_info_list(Mem, mem_object_info(), fun get_mem_object_info/2). image_info() -> [ format, element_size, row_pitch, slice_pitch, width, height, depth ]. get_image_info(_Mem, _Info) -> ?nif_stub. get_image_info(Mem) when ?is_mem(Mem) -> get_info_list(Mem, image_info(), fun get_image_info/2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Sample %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @type cl_addressing_mode() = {'none'|'clamp_to_edge'|'clamp'|'repeat'} %% -type cl_addressing_mode() :: 'none'|'clamp_to_edge'|'clamp'|'repeat'. %% @type cl_filter_mode() = {'nearest' | 'linear' } -type cl_filter_mode() :: 'nearest' | 'linear'. %% %% @spec create_sampler(Context::cl_context(),Normalized::boolean(), %% AddressingMode::cl_addressing_mode(), %% FilterMode::cl_filter_mode()) -> %% {'ok', cl_sampler()} | {'error', cl_error()} %% @doc Creates a sampler object. %% %% A sampler object describes how to sample an image when the image %% is read in the kernel. The built-in functions to read from an %% image in a kernel take a sampler as an argument. The sampler %% arguments to the image read function can be sampler objects %% created using OpenCL functions and passed as argument values to %% the kernel or can be samplers declared inside a kernel. In this %% section we discuss how sampler objects are created using OpenCL %% functions. -spec create_sampler(Context::cl_context(),Normalized::boolean(), AddressingMode::cl_addressing_mode(), FilterMode::cl_filter_mode()) -> {'ok', cl_sampler()} | {'error', cl_error()}. create_sampler(_Context, _Normalized, _AddressingMode, _FilterMode) -> ?nif_stub. %% %% @spec release_sampler(Sampler::cl_sampler()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrements the sampler reference count. %% %% The sampler object is deleted after the reference count becomes %% zero and commands queued for execution on a command-queue(s) that %% use sampler have finished. -spec release_sampler(Sampler::cl_sampler()) -> 'ok' | {'error', cl_error()}. release_sampler(Sampler) when ?is_sampler(Sampler) -> ok. %% %% @spec retain_sampler(Sampler::cl_sampler()) -> %% 'ok' | {'error', cl_error()} %% @doc Increments the sampler reference count. -spec retain_sampler(Sampler::cl_sampler()) -> 'ok' | {'error', cl_error()}. retain_sampler(Sampler) when ?is_sampler(Sampler) -> ok. sampler_info() -> [ reference_count, context, normalized_coords, addressing_mode, filter_mode ]. %% @spec get_sampler_info(Sampler::cl_sampler(), InfoType::cl_sampler_info_type()) -> %% {'ok', term()} | {'error', cl_error()} %% @doc Returns InfoType information about the sampler object. get_sampler_info(_Sampler, _Info) -> ?nif_stub. %% @spec get_sampler_info(Sampler::cl_sampler()) -> {'ok', term()} | {'error', cl_error()} %% @doc Returns all information about the sampler object. %% @see get_sampler_info/2 get_sampler_info(Sampler) -> get_info_list(Sampler, sampler_info(), fun get_sampler_info/2). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Program %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @spec create_program_with_source(Context::cl_context(), %% Source::iodata()) -> %% {'ok', cl_program()} | {'error', cl_error()} %% %% @doc Creates a program object for a context, %% and loads the source code specified by the text strings in the %% strings array into the program object. %% %% The devices associated with the program object are the devices associated with context. %% OpenCL allows applications to create a program object using the %% program source or binary and build appropriate program %% executables. This allows applications to determine whether they %% want to use the pre-built offline binary or load and compile the %% program source and use the executable compiled/linked online as the %% program executable. This can be very useful as it allows %% applications to load and build program executables online on its %% first instance for appropriate OpenCL devices in the system. These %% executables can now be queried and cached by the %% application. Future instances of the application launching will no %% longer need to compile and build the program executables. The %% cached executables can be read and loaded by the application, which %% can help significantly reduce the application initialization time. %% An OpenCL program consists of a set of kernels that are identified %% as functions declared with the __kernel qualifier in the program %% source. OpenCL programs may also contain auxiliary functions and %% constant data that can be used by __kernel functions. The program %% executable can be generated online or offline by the OpenCL %% compiler for the appropriate target device(s). %% %% @todo allow iodata and handle multiple binaries in the driver %% -spec create_program_with_source(Context::cl_context(), Source::iodata()) -> {'ok', cl_program()} | {'error', cl_error()}. create_program_with_source(_Context, _Source) -> ?nif_stub. %% %% @spec create_program_with_binary(Context::cl_context(), %% DeviceList::[cl_device_id()], %% BinaryList::[binary()]) -> %% {'ok', cl_program()} | {'error', cl_error()} %% %% @doc Creates a program object for a context, and loads specified binary data into the program object. %% %% OpenCL allows applications to create a program object using the %% program source or binary and build appropriate program %% executables. This allows applications to determine whether they %% want to use the pre-built offline binary or load and compile the %% program source and use the executable compiled/linked online as the %% program executable. This can be very useful as it allows %% applications to load and build program executables online on its %% first instance for appropriate OpenCL devices in the system. These %% executables can now be queried and cached by the %% application. Future instances of the application launching will no %% longer need to compile and build the program executables. The %% cached executables can be read and loaded by the application, which %% can help significantly reduce the application initialization time. %% %% The binaries and device can be generated by calling: %% %% {ok,P} = cl:create_program_with_source(Context,Source), %% ok = cl:build_program(P, DeviceList, Options), %% {ok,DeviceList} = cl:get_program_info(P, devices), %% {ok,BinaryList} = cl:get_program_info(P, binaries). %% %% -spec create_program_with_binary(Context::cl_context(), DeviceList::[cl_device_id()], BinaryList::[binary()]) -> {'ok', cl_program()} | {'error', cl_error()}. create_program_with_binary(_Context, _DeviceList, _BinaryList) -> ?nif_stub. -spec create_program_with_builtin_kernels(Context::cl_context(), DeviceList::[cl_device_id()], KernelNames::string()) -> {'ok', cl_program()} | {'error', cl_error()}. create_program_with_builtin_kernels(_Context, _DeviceList, _KernelNames) -> ?nif_stub. %% %% @spec retain_program(Program::cl_program()) -> %% 'ok' | {'error', cl_error()} %% @doc Increments the program reference count. retain_program(Program) when ?is_program(Program) -> ok. %% %% @spec release_program(Program::cl_program()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrements the program reference count. %% %% The program object is deleted after all kernel objects associated %% with program have been deleted and the program reference count %% becomes zero. release_program(Program) when ?is_program(Program) -> ok. %% %% @spec build_program(Program::cl_program(), %% DeviceList::[cl_device_id()], %% Options::string()) -> %% 'ok' | {'error', cl_error()} %% %% @doc Builds (compiles and links) a program executable from the %% program source or binary. %% %% OpenCL allows program executables to be built using the source or the binary. %% %% The build options are categorized as pre-processor options, options %% for math intrinsics, options that control optimization and %% miscellaneous options. This specification defines a standard set of %% options that must be supported by an OpenCL compiler when building %% program executables online or offline. These may be extended by a %% set of vendor- or platform-specific options. %% %%

    Preprocessor Options

    These options %% control the OpenCL preprocessor which is run on each program source %% before actual compilation. -D options are processed in the order %% they are given in the options argument to %% build_program/3. %% %%
    %%
    -D name
    %%

    Predefine name as a macro, with definition 1.

    %%
    -D name=definition

    The contents of definition %% are tokenized and processed as if they appeared during translation phase three in a `#define' %% directive. In particular, the definition will be truncated by %% embedded newline characters.

    %%
    -I dir

    Add the directory dir to the list of directories to be %% searched for header files.

    %%
    %% %%

    Math Intrinsics Options

    These options control compiler %% behavior regarding floating-point arithmetic. These options trade %% off between speed and correctness. %%
    -cl-single-precision-constant

    Treat double %% precision floating-point constant as single precision constant. %%

    -cl-denorms-are-zero

    This option controls %% how single precision and double precision denormalized numbers are %% handled. If specified as a build option, the single precision %% denormalized numbers may be flushed to zero and if the optional %% extension for double precision is supported, double precision %% denormalized numbers may also be flushed to zero. This is intended %% to be a performance hint and the OpenCL compiler can choose not to %% flush denorms to zero if the device supports single precision (or %% double precision) denormalized numbers.

    This option is %% ignored for single precision numbers if the device does not support %% single precision denormalized numbers i.e. CL_FP_DENORM bit is not %% set in CL_DEVICE_SINGLE_FP_CONFIG.

    This option is %% ignored for double precision numbers if the device does not support %% double precision or if it does support double precison but %% CL_FP_DENORM bit is not set in CL_DEVICE_DOUBLE_FP_CONFIG.

    %% %% This flag only applies for scalar and vector single precision %% floating-point variables and computations on these floating-point %% variables inside a program. It does not apply to reading from or %% writing to image objects.


    %%

    %% %%

    Optimization Options

    These options control various %% sorts of optimizations. Turning on optimization flags makes the %% compiler attempt to improve the performance and/or code size at the %% expense of compilation time and possibly the ability to debug the %% program.
    -cl-opt-disable

    This option %% disables all optimizations. The default is optimizations are %% enabled.

    -cl-strict-aliasing

    This option %% allows the compiler to assume the strictest aliasing rules. %%

    %%

    The following options control compiler %% behavior regarding floating-point arithmetic. These options trade %% off between performance and correctness and must be specifically %% enabled. These options are not turned on by default since it can %% result in incorrect output for programs which depend on an exact %% implementation of IEEE 754 rules/specifications for math functions. %%

    -cl-mad-enable

    Allow a * b + c %% to be replaced by a mad. The mad computes %% a * b + c with reduced accuracy. For example, some %% OpenCL devices implement mad as truncate %% the result of a * b before adding it to %% c.

    %%
    -cl-no-signed-zeros
    %%

    Allow optimizations for floating-point arithmetic that ignore %% the signedness of zero. IEEE 754 arithmetic specifies the behavior %% of distinct +0.0 and -0.0 values, which %% then prohibits simplification of expressions such as %% x+0.0 or 0.0*x (even with -clfinite-math %% only). This option implies that the sign of a zero result isn't %% significant.

    %%
    -cl-unsafe-math-optimizations

    Allow optimizations %% for floating-point arithmetic that (a) assume that arguments and %% results are valid, (b) may violate IEEE 754 standard and (c) may %% violate the OpenCL numerical compliance requirements as defined in %% section 7.4 for single-precision floating-point, section 9.3.9 for %% double-precision floating-point, and edge case behavior in section %% 7.5. This option includes the -cl-no-signed-zeros and %% -cl-mad-enable options.

    %%
    -cl-finite-math-only

    %% Allow optimizations for floating-point arithmetic that assume that arguments and results %% are not NaNs or ±infinity. This option may violate the OpenCL numerical compliance %% requirements defined in in section 7.4 for single-precision floating-point, %% section 9.3.9 for double-precision floating-point, and edge case behavior in section 7.5. %%

    %%
    -cl-fast-relaxed-math

    %% Sets the optimization options -cl-finite-math-only and -cl-unsafe-math-optimizations. %% This allows optimizations for floating-point arithmetic that may violate the IEEE 754 %% standard and the OpenCL numerical compliance requirements defined in the specification in section 7.4 for single-precision floating-point, section 9.3.9 for double-precision floating-point, %% and edge case behavior in section 7.5. This option causes the preprocessor macro %% %% __FAST_RELAXED_MATH__ to be defined in the OpenCL program. %%


    %%

    Options to Request or Suppress Warnings

    %% Warnings are diagnostic messages that report constructions which are not inherently erroneous %% but which are risky or suggest there may have been an error. The following languageindependent %% options do not enable specific warnings but control the kinds of diagnostics %% produced by the OpenCL compiler. %%
    -w

    %% Inhibit all warning messages. %%

    -Werror

    %% Make all warnings into errors. %%

    %%
    build_program(Program, DeviceList, Options) -> case async_build_program(Program, DeviceList, Options) of {ok,Ref} -> receive {cl_async,Ref,Reply} -> Reply end; Error -> Error end. async_build_program(_Program, _DeviceList, _Options) -> ?nif_stub. %% %% @spec unload_compiler() -> 'ok' | {'error', cl_error()} %% @doc Allows the implementation to release the resources allocated by the OpenCL compiler. %% %% This is a hint from the application and does not guarantee that the %% compiler will not be used in the future or that the compiler will %% actually be unloaded by the implementation. Calls to build_program/3 %% after unload_compiler/0 will reload the compiler, if necessary, to %% build the appropriate program executable. unload_compiler() -> ?nif_stub. %% @spec unload_platform_compiler(Platform :: cl_platform_id()) -> %% 'ok' | {'error', cl_error()} -spec unload_platform_compiler(Platform::cl_platform_id()) -> 'ok' | {'error', cl_error()}. unload_platform_compiler(_Platform) -> ?nif_stub. -spec compile_program(Program::cl_program(), DeviceList::[cl_device_id()], Options::string(), Headers::[cl_program()], Names::[string()]) -> 'ok' | {'error', cl_error()}. compile_program(Program, Devices, Options, Headers, Names) -> case async_compile_program(Program, Devices, Options, Headers, Names) of {ok,Ref} -> receive {cl_async,Ref,Reply} -> Reply end; Error -> Error end. async_compile_program(_Program, _Devices, _Options, _Headers, _Names) -> ?nif_stub. -spec link_program(Context::cl_context(), DeviceList::[cl_device_id()], Options::string(), Programs::[cl_program()]) -> {'ok',cl_program()} | {'error', cl_error()}. link_program(Context, DeviceList, Options, Programs) -> case async_link_program(Context, DeviceList, Options, Programs) of {ok,{Ref,Program}} -> receive {cl_async,Ref,ok} -> {ok,Program}; {cl_async,Ref,Error} -> Error end; Error -> Error end. async_link_program(_Context, _DeviceList, _Options, _Programs) -> ?nif_stub. program_info() -> [ reference_count, context, num_devices, devices, source, binary_sizes, binaries ]. %% @doc Returns specific information about the program object. get_program_info(_Program, _Info) -> ?nif_stub. %% @doc Returns all information about the program object. get_program_info(Program) when ?is_program(Program) -> get_info_list(Program, program_info(), fun get_program_info/2). program_build_info() -> [ status, options, log ]. %% @doc Returns specific build information for each device in the program object. get_program_build_info(_Program, _Device, _Info) -> ?nif_stub. %% @doc Returns all build information for each device in the program object. get_program_build_info(Program, Device) -> get_info_list(Program, program_build_info(), fun(P, I) -> get_program_build_info(P, Device, I) end). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Kernel %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% @spec create_kernel(Program::cl_program(),Name::string()) -> %% {'ok', cl_kernel()} | {'error', cl_error()} %% %% @doc Creates a kernal object. %% %% A kernel is a function declared in a program. A kernel is %% identified by the __kernel qualifier applied to any function in a %% program. A kernel object encapsulates the specific __kernel %% function declared in a program and the argument values to be used %% when executing this __kernel function. create_kernel(_Program, _Name) -> ?nif_stub. %% %% @spec create_kernels_in_program(Program::cl_program()) -> %% {'ok', [cl_kernel()]} | {'error', cl_error()} %% %% @doc Creates kernel objects for all kernel functions in a program object. %% %% Creates kernel objects for all kernel functions in program. Kernel %% objects are not created for any __kernel functions in program that %% do not have the same function definition across all devices for %% which a program executable has been successfully built. %% Kernel objects can only be created once you have a program object %% with a valid program source or binary loaded into the program %% object and the program executable has been successfully built for %% one or more devices associated with program. No changes to the %% program executable are allowed while there are kernel objects %% associated with a program object. This means that calls to %% clBuildProgram return CL_INVALID_OPERATION if there are kernel %% objects attached to a program object. The OpenCL context associated %% with program will be the context associated with kernel. The list %% of devices associated with program are the devices associated with %% kernel. Devices associated with a program object for which a valid %% program executable has been built can be used to execute kernels %% declared in the program object. create_kernels_in_program(_Program) -> ?nif_stub. %% %% @type cl_kernel_arg() = integer() | float() | binary() %% %% @spec set_kernel_arg(Kernel::cl_kernel(), Index::non_neg_integer(), %% Argument::cl_kernel_arg()) -> %% 'ok' | {'error', cl_error()} %% @doc Used to set the argument value for a specific argument of a kernel. %% %% For now set_kernel_arg handles integer and floats %% to set any other type use `<>' %% use the macros defined in cl.hrl to get it right (except for padding) %% %% A kernel object does not update the reference count for objects %% such as memory, sampler objects specified as argument values by %% set_kernel_arg/3, Users may not rely on a kernel object to retain %% objects specified as argument values to the kernel. %% %% Implementations shall not allow cl_kernel objects to hold reference %% counts to cl_kernel arguments, because no mechanism is provided for %% the user to tell the kernel to release that ownership right. If the %% kernel holds ownership rights on kernel args, that would make it %% impossible for the user to tell with certainty when he may safely %% release user allocated resources associated with OpenCL objects %% such as the cl_mem backing store used with CL_MEM_USE_HOST_PTR. set_kernel_arg(_Kernel,_Index,_Argument) -> ?nif_stub. %% %% @spec set_kernel_arg_size(Kernel::cl_kernel(), Index::non_neg_integer(), %% Size::non_neg_integer()) -> %% 'ok' | {'error', cl_error()} %% %% @doc clErlang special to set kernel arg with size only (local mem etc) %% set_kernel_arg_size(_Kernel,_Index,_Size) -> ?nif_stub. %% %% @spec retain_kernel(Context::cl_kernel()) -> %% 'ok' | {'error', cl_error()} %% @doc Increments the program kernel reference count. retain_kernel(Kernel) when ?is_kernel(Kernel) -> ok. %% %% @spec release_kernel(Context::cl_kernel()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrements the kernel reference count. release_kernel(Kernel) when ?is_kernel(Kernel) -> ok. kernel_info() -> [ function_name, num_args, reference_count, context, program ]. %% @doc Returns specific information about the kernel object. get_kernel_info(_Kernel, _Info) -> ?nif_stub. %% @doc Returns all information about the kernel object. get_kernel_info(Kernel) when ?is_kernel(Kernel) -> get_info_list(Kernel, kernel_info(), fun get_kernel_info/2). kernel_workgroup_info() -> [ work_group_size, compile_work_group_size, local_mem_size ]. %% @doc Returns specific information about the kernel object that may %% be specific to a device. get_kernel_workgroup_info(_Kernel, _Device, _Info) -> ?nif_stub. %% @doc Returns all information about the kernel object that may be %% specific to a device. get_kernel_workgroup_info(Kernel, Device) -> get_info_list(Kernel, kernel_workgroup_info(), fun(K,I) -> get_kernel_workgroup_info(K,Device,I) end). %% @doc Returns specific information about the kernel argument get_kernel_arg_info(_Kernel, _ArgIndex, _Info) -> ?nif_stub. get_kernel_arg_info(Kernel, ArgIndex) -> get_info_list(Kernel, kernel_arg_info(), fun(K,I) -> get_kernel_arg_info(K,ArgIndex,I) end). get_kernel_arg_info(Kernel) -> case get_kernel_info(Kernel, num_args) of {ok, N} -> {ok, lists:map(fun(I) -> {ok,Info} = get_kernel_arg_info(Kernel, I), {I,Info} end, lists:seq(0, N-1))}; Error -> Error end. kernel_arg_info() -> [address_qualifier, access_qualifier, type_name, type_qualifier, name]. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Events %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @spec enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(), %% WaitList::[cl_event()]) -> %% {'ok', cl_event()} | {'error', cl_error()} %% %% @doc Enqueues a command to execute a kernel on a device. %% %% The kernel is executed using a single work-item. %% @see enqueue_nd_range_kernel/5 -spec enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_task(Queue, Kernel, WaitList) -> enqueue_task(Queue, Kernel, WaitList, true). -spec nowait_enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(), WaitList::[cl_event()]) -> 'ok' | {'error', cl_error()}. nowait_enqueue_task(Queue, Kernel, WaitList) -> enqueue_task(Queue, Kernel, WaitList, false). enqueue_task(_Queue, _Kernel, _WaitList, _WantEvent) -> ?nif_stub. %% %% @spec enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(), %% Global::[non_neg_integer()], %% Local::[non_neg_integer()], %% WaitList::[cl_event()]) -> %% {'ok', cl_event()} | {'error', cl_error()} %% %% @doc Enqueues a command to execute a kernel on a device. %% %% Work-group instances are executed in parallel across multiple %% compute units or concurrently on the same compute unit. %% %% Each work-item is uniquely identified by a global identifier. The %% global ID, which can be read inside the kernel, is computed using %% the value given by global_work_size and global_work_offset. In %% OpenCL 1.0, the starting global ID is always (0, 0, ... 0). In %% addition, a work-item is also identified within a work-group by a %% unique local ID. The local ID, which can also be read by the %% kernel, is computed using the value given by local_work_size. The %% starting local ID is always (0, 0, ... 0). -spec enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(), Global::[non_neg_integer()], Local::[non_neg_integer()], WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList) -> enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList, true). -spec nowait_enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(), Global::[non_neg_integer()], Local::[non_neg_integer()], WaitList::[cl_event()]) -> 'ok' | {'error', cl_error()}. nowait_enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList) -> enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList, false). enqueue_nd_range_kernel(_Queue, _Kernel, _Global, _Local, _WaitList, _WantEvent) -> ?nif_stub. %% @spec enqueue_marker(Queue::cl_queue()) -> %% {'ok', cl_event()} | {'error', cl_error()} %% %% @doc Enqueues a marker command. %% %% Enqueues a marker command to command_queue. The marker command %% returns an event which can be used to queue a wait on this marker %% event i.e. wait for all commands queued before the marker command %% to complete. -spec enqueue_marker(Queue::cl_queue()) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_marker(_Queue) -> ?nif_stub. %% %% @spec enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) -> %% 'ok' | {'error', cl_error()} %% %% @doc Enqueues a wait for a specific event or a list of events %% to complete before any future commands queued in the command-queue are %% executed. %% %% The context associated with events in WaitList and Queue must be the same. -spec enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) -> 'ok' | {'error', cl_error()}. enqueue_wait_for_events(_Queue, _WaitList) -> ?nif_stub. %% %% @doc Enqueue commands to read from a buffer object to host memory. %% %% Calling enqueue_read_buffer to read a region of the %% buffer object with the Buffer argument value set to %% host_ptr + offset, where %% host_ptr is a pointer to the memory region specified %% when the buffer object being read is created with %% CL_MEM_USE_HOST_PTR, must meet the following %% requirements in order to avoid undefined behavior: %% %%
    • All commands that use this buffer object have finished %% execution before the read command begins execution
    • %%
    • The buffer object is not mapped
    • %%
    • The buffer object is not used by any command-queue until the %% read command has finished execution
    • %%
    -spec enqueue_read_buffer(Queue::cl_queue(), Buffer::cl_mem(), Offset::non_neg_integer(), Size::non_neg_integer(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_read_buffer(_Queue, _Buffer, _Offset, _Size, _WaitList) -> ?nif_stub. %% %% Read rectangular section from buffer memory into host memory %% -spec enqueue_read_buffer_rect(Queue::cl_queue(), Buffer::cl_mem(), BufferOrigin::[non_neg_integer()], HostOrigin::[non_neg_integer()], Region::[non_neg_integer()], BufferRowPicth::non_neg_integer(), BufferSlicePicth::non_neg_integer(), HostRowPicth::non_neg_integer(), HostSlicePicth::non_neg_integer(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_read_buffer_rect(_Queue, _Buffer, _BufferOrigin, _HostOrigin, _Region, _BufferRowPitch, _BufferSlicePitch, _HostRowPitch, _HostSlicePitch, _WaitList) -> ?nif_stub. %% %% @spec enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(), %% Offset::non_neg_integer(), %% Size::non_neg_integer(), %% Data::binary(), %% WaitList::[cl_event()]) -> %% {'ok', cl_event()} | {'error', cl_error()} %% %% @doc Enqueue commands to write to a buffer object from host memory. %% %% Calling enqueue_write_buffer to update the latest bits %% in a region of the buffer object with the Buffer %% argument value set to host_ptr + offset, %% where host_ptr is a pointer to the memory region %% specified when the buffer object being read is created with %% CL_MEM_USE_HOST_PTR, must meet the following %% requirements in order to avoid undefined behavior: %% %%
    • The host memory region given by (host_ptr + offset, cb) %% contains the latest bits when the enqueued write command begins %% execution.
    • %%
    • The buffer object is not mapped
    • %%
    • The buffer object is not used by any command-queue until the read %% command has finished execution
    -spec enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(), Offset::non_neg_integer(), Size::non_neg_integer(), Data::binary(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList) -> enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList, true). -spec nowait_enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(), Offset::non_neg_integer(), Size::non_neg_integer(), Data::binary(), WaitList::[cl_event()]) -> 'ok' | {'error', cl_error()}. nowait_enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList) -> enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList, false). enqueue_write_buffer(_Queue, _Buffer, _Offset, _Size, _Data, _WaitList, _WantEvent) -> ?nif_stub. %% %% Write rectangular section from host memory into buffer memory %% -spec enqueue_write_buffer_rect(Queue::cl_queue(), Buffer::cl_mem(), BufferOrigin::[non_neg_integer()], HostOrigin::[non_neg_integer()], Region::[non_neg_integer()], BufferRowPicth::non_neg_integer(), BufferSlicePicth::non_neg_integer(), HostRowPicth::non_neg_integer(), HostSlicePicth::non_neg_integer(), Data::binary(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_write_buffer_rect(_Queue, _Buffer, _BufferOrigin, _HostOrigin, _Region, _BufferRowPitch, _BufferSlicePitch, _HostRowPitch, _HostSlicePitch, _Data, _WaitList) -> ?nif_stub. %% %% Fill buffer memory from pattern, %% Size and Offset must be multiple of Pattern size %% Pattern size must be onle of 1,2,4,8,16,32,64 or 128 %% -spec enqueue_fill_buffer(Queue::cl_queue(), Buffer::cl_mem(), Pattern::binary(), Offset::non_neg_integer(), Size::non_neg_integer(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_fill_buffer(_Queue, _Buffer, _Pattern, _Offset, _Size, _WaitList) -> ?nif_stub. %% %% @spec enqueue_barrier(Queue::cl_queue()) -> %% 'ok' | {'error', cl_error()} %% %% @doc A synchronization point that enqueues a barrier operation. %% %% enqueue_barrier/1 is a synchronization point that ensures that all %% queued commands in command_queue have finished execution before %% the next batch of commands can begin execution. -spec enqueue_barrier(Queue::cl_queue()) -> 'ok' | {'error', cl_error()}. enqueue_barrier(_Queue) -> ?nif_stub. %% @spec enqueue_marker_with_wait_list(Queue::cl_queue(), %% WaitList::[cl_event()]) -> %% {'ok', cl_event()} | {'error', cl_error()} -spec enqueue_marker_with_wait_list(Queue::cl_queue(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_marker_with_wait_list(_Queue, _WaitList) -> ?nif_stub. %% @spec enqueue_barrier_with_wait_list(Queue::cl_queue(), %% WaitList::[cl_event()]) -> %% {'ok', cl_event()} | {'error', cl_error()} -spec enqueue_barrier_with_wait_list(Queue::cl_queue(), WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_barrier_with_wait_list(_Queue, _WaitList) -> ?nif_stub. enqueue_read_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch, _WaitList) -> ?nif_stub. enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch, Data, WaitList) -> enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch, Data, WaitList, true). nowait_enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch, Data, WaitList) -> enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch, Data, WaitList, false). enqueue_write_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch, _Data, _WaitList, _WantEvent) -> ?nif_stub. enqueue_copy_buffer(_Queue, _SrcBuffer, _DstBuffer, _SrcOffset, _DstOffset, _Cb, _WaitList) -> ?nif_stub. enqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer, _SrcOrigin, _DstOrigin, _Region, _SrcRowPitch, _SrcSlicePitch, _DstRowPitch, _DstSlicePitch, _WaitList) -> ?nif_stub. enqueue_copy_image(_QUeue, _SrcImage, _DstImage, _Origin, _Region, _WaitList) -> ?nif_stub. %% FillColor = <> %% | <> %% | <> %% Use device endian! check device_info(D, endian_little) -spec enqueue_fill_image(Queue::cl_queue(), Image::cl_mem(), FillColor::binary(), Origin::[non_neg_integer()], Region::[non_neg_integer()], WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_fill_image(_Queue, _Image, _FillColor, _Origin, _Region, _WaitList) -> ?nif_stub. enqueue_copy_image_to_buffer(_Queue, _SrcImage, _DstBuffer, _Origin, _Region, _DstOffset, _WaitList) -> ?nif_stub. enqueue_copy_buffer_to_image(_Queue, _SrcBuffer, _DstImage, _SrcOffset, _DstOrigin, _Region, _WaitList) -> ?nif_stub. enqueue_map_buffer(_Queue, _Buffer, _MapFlags, _Offset, _Size, _WaitList) -> ?nif_stub. enqueue_map_image(_Queue, _Image, _MapFlags, _Origin, _Region, _WaitList) -> ?nif_stub. enqueue_unmap_mem_object(_Queue, _Mem, _WaitList) -> ?nif_stub. -spec enqueue_migrate_mem_objects(Queue::cl_queue(), MemObjects::[cl_mem()], Flags::[host|content_undefined], WaitList::[cl_event()]) -> {'ok', cl_event()} | {'error', cl_error()}. enqueue_migrate_mem_objects(_Queue, _MemObjects, _Flags, _WaitList) -> ?nif_stub. %% %% @spec flush(Queue::cl_queue()) -> %% 'ok' | {'error', cl_error()} %% %% @doc Issues all previously queued OpenCL commands %% in a command-queue to the device associated with the command-queue. %% %% flush only guarantees that all queued commands to command_queue get %% issued to the appropriate device. There is no guarantee that they %% will be complete after clFlush returns. -spec flush(Queue::cl_queue()) -> 'ok' | {'error', cl_error()}. flush(Queue) -> case async_flush(Queue) of {ok,Ref} -> receive {cl_async,Ref,Reply} -> Reply end; Error -> Error end. async_flush(_Queue) -> ?nif_stub. %% %% @spec finish(Queue::cl_queue()) -> %% 'ok' | {'error', cl_error()} %% %% @doc Blocks until all previously queued OpenCL commands %% in a command-queue are issued to the associated device and have %% completed. %% %% finish does not return until all queued commands in command_queue %% have been processed and completed. clFinish is also a %% synchronization point. -spec finish(Queue::cl_queue()) -> 'ok' | {'error', cl_error()}. finish(Queue) -> case async_finish(Queue) of {ok,Ref} -> receive {cl_async,Ref,Reply} -> Reply end; Error -> Error end. async_finish(_Queue) -> ?nif_stub. %% %% @spec retain_event(Event::cl_event()) -> %% 'ok' | {'error', cl_error()} %% @doc Increments the event reference count. %% NOTE: The OpenCL commands that return an event perform an implicit retain. retain_event(Event) when ?is_event(Event) -> ok. %% %% @spec release_event(Event::cl_event()) -> %% 'ok' | {'error', cl_error()} %% @doc Decrements the event reference count. %% %% Decrements the event reference count. The event object is deleted %% once the reference count becomes zero, the specific command %% identified by this event has completed (or terminated) and there %% are no commands in the command-queues of a context that require a %% wait for this event to complete. release_event(Event) when ?is_event(Event) -> ok. %% @doc Returns all possible event_info items. event_info() -> [ command_queue, command_type, reference_count, execution_status ]. %% @doc Returns specific information about the event object. get_event_info(_Event, _Info) -> ?nif_stub. %% @doc Returns all specific information about the event object. get_event_info(Event) when ?is_event(Event) -> get_info_list(Event, event_info(), fun get_event_info/2). %% IMAGES %% @doc return a list of image formats [{Order,Type}] get_supported_image_formats(_Context, _Flags, _ImageType) -> ?nif_stub. -spec create_image2d(Conext::cl_context(), Flags::[cl_mem_flag()], ImageFormat::#cl_image_format{}, Width::non_neg_integer(), Height::non_neg_integer(), Pitch::non_neg_integer(), Data::binary()) -> {'ok', cl_mem()} | {'error', cl_error()}. create_image2d(_Context, _MemFlags, _ImageFormat, _Width, _Height, _Pitch, _Data) -> ?nif_stub. -spec create_image3d(Conext::cl_context(), Flags::[cl_mem_flag()], ImageFormat::#cl_image_format{}, Width::non_neg_integer(), Height::non_neg_integer(), Depth::non_neg_integer(), RowPitch::non_neg_integer(), SlicePitch::non_neg_integer(), Data::binary()) -> {'ok', cl_mem()} | {'error', cl_error()}. create_image3d(_Context, _MemFlags, _ImageFormat, _Width, _Height, _Depth, _RowPicth, _SlicePitch, _Data) -> ?nif_stub. -spec create_image(Conext::cl_context(), Flags::[cl_mem_flag()], ImageFormat::#cl_image_format{}, ImageDesc::#cl_image_desc{}, Data::binary()) -> {'ok', cl_mem()} | {'error', cl_error()}. create_image(_Context, _MemFlags, _ImageFormat, _ImageDesc, _Data) -> ?nif_stub. %% Wait for all events in EventList to complete -spec wait_for_events(EventList::[cl_event]) -> [{'ok','completed'} | {'ok',binary()} | {'error',cl_error()}]. wait_for_events([Event|Es]) -> [wait(Event) | wait_for_events(Es)]; wait_for_events([]) -> []. %% %% @spec wait(Event::cl_event) -> %% {'ok','completed'} | {'ok',Binary} | {'error',cl_error()} %% wait(Event) -> wait(Event, infinity). %% @spec wait_for_event(Event::cl_event) -> %% {'ok','completed'} | {'ok',Binary} | {'error',cl_error()} %% @equiv wait(Event, infinity) %% wait_for_event(Event) -> wait(Event, infinity). %% %% @spec wait(Event::cl_event, Timeout::timeout()) -> %% {'ok','completed'} | {'ok',Binary} | %% {'error',cl_error()} | {'error',timeout} %% %% %% @doc Waits for commands identified by event objects to complete. %% %% Waits for commands identified by event objects %% in event_list to complete. A command is considered complete if its %% execution status is CL_COMPLETE or a negative value. wait(Event, Timeout) when ?is_event(Event) -> case async_wait_for_event(Event) of {ok,Ref} -> wait1(Ref,Event,Timeout); Error -> Error end. wait1(Ref, Event, Timeout) when ?is_event(Event) -> receive {cl_event, Ref, Binary} when is_binary(Binary) -> release_event(Event), {ok,Binary}; {cl_event, Ref, complete} -> release_event(Event), {ok,completed}; {cl_event, Ref, Err} -> release_event(Event), Err after Timeout -> {error, timeout} end. %% %% @spec async_wait_for_event(Event::cl_event()) -> %% {'ok',reference()} | {'error',cl_error()} %% %% @doc Initiate an asynchronous wait operation. %% %% Generate a wait operation that will run non blocking. %% A reference is return that can be used to match the event %% that is sent when the event has completed or resulted in an error. %% The event returned has the form {cl_event, Ref, Result} %% where Ref is the reference that was returned from the call and %% Result may be one of binary() | 'complete' or {error,cl_error()}. %% -spec async_wait_for_event(Event::cl_event()) -> {'ok',reference()} | {'error',cl_error()}. async_wait_for_event(_Event) -> ?nif_stub. %% @hidden %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Utilities %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% get_info_list(Object, InfoList, Fun) -> get_info_list(Object, InfoList, Fun, undefined, []). get_info_list(Object, [I|Is], Fun, Err, Acc) -> case Fun(Object, I) of {error,Reason} -> io:format("InfoError: ~s [~p]\n", [I,Reason]), get_info_list(Object, Is, Fun, Reason, Acc); {ok,Value} -> get_info_list(Object, Is, Fun, Err, [{I,Value}|Acc]) end; get_info_list(_Object,[], _Fun, undefined, []) -> {ok, []}; get_info_list(_Object,[], _Fun, Err, []) -> {error, Err}; get_info_list(_Object,[], _Fun, _Err, Acc) -> {ok, reverse(Acc)}. cl-cl-1.2.3/src/cl10.erl000066400000000000000000000244111301041406700145370ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %%% @author Tony Rogvall %%% @copyright (C) 2013, Tony Rogvall %%% @doc %%% OpenCL 1.0 API %%% @end %%% Created : 13 Jan 2013 by Tony Rogvall -module(cl10). -on_load(init/0). -export([start/0, start/1, stop/0]). %% Platform -export([get_platform_ids/0]). -export([platform_info/0]). -export([get_platform_info/1,get_platform_info/2]). %% Devices -export([get_device_ids/0, get_device_ids/2]). -export([device_info/0]). -export([get_device_info/1,get_device_info/2]). %% Context -export([create_context/1]). -export([create_context_from_type/1]). -export([release_context/1]). -export([retain_context/1]). -export([context_info/0]). -export([get_context_info/1,get_context_info/2]). %% Command queue -export([create_queue/3]). -export([set_queue_property/3]). -export([release_queue/1]). -export([retain_queue/1]). -export([queue_info/0]). -export([get_queue_info/1,get_queue_info/2]). %% Memory object -export([create_buffer/3, create_buffer/4]). -export([release_mem_object/1]). -export([retain_mem_object/1]). -export([mem_object_info/0]). -export([get_mem_object_info/1,get_mem_object_info/2]). -export([image_info/0]). -export([get_image_info/1,get_image_info/2]). -export([get_supported_image_formats/3]). -export([create_image2d/7]). -export([create_image3d/9]). %% Sampler -export([create_sampler/4]). -export([release_sampler/1]). -export([retain_sampler/1]). -export([sampler_info/0]). -export([get_sampler_info/1,get_sampler_info/2]). %% Program -export([create_program_with_source/2]). -export([create_program_with_binary/3]). -export([release_program/1]). -export([retain_program/1]). -export([build_program/3, async_build_program/3]). -export([unload_compiler/0]). -export([program_info/0]). -export([get_program_info/1,get_program_info/2]). -export([program_build_info/0]). -export([get_program_build_info/2,get_program_build_info/3]). %% Kernel -export([create_kernel/2]). -export([create_kernels_in_program/1]). -export([set_kernel_arg/3]). -export([set_kernel_arg_size/3]). -export([release_kernel/1]). -export([retain_kernel/1]). -export([kernel_info/0]). -export([get_kernel_info/1,get_kernel_info/2]). -export([kernel_workgroup_info/0]). -export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]). %% Events -export([enqueue_task/3, enqueue_task/4]). -export([nowait_enqueue_task/3]). -export([enqueue_nd_range_kernel/5]). -export([enqueue_nd_range_kernel/6]). -export([nowait_enqueue_nd_range_kernel/5]). -export([enqueue_marker/1]). -export([enqueue_barrier/1]). -export([enqueue_wait_for_events/2]). -export([enqueue_read_buffer/5]). -export([enqueue_write_buffer/6]). -export([enqueue_write_buffer/7]). -export([nowait_enqueue_write_buffer/6]). -export([enqueue_read_image/7]). -export([enqueue_write_image/8]). -export([enqueue_write_image/9]). -export([nowait_enqueue_write_image/8]). -export([enqueue_copy_image/6]). -export([enqueue_copy_image_to_buffer/7]). -export([enqueue_copy_buffer_to_image/7]). -export([enqueue_map_buffer/6]). -export([enqueue_map_image/6]). -export([enqueue_unmap_mem_object/3]). -export([release_event/1]). -export([retain_event/1]). -export([event_info/0]). -export([get_event_info/1, get_event_info/2]). -export([wait/1, wait/2]). -export([async_flush/1, flush/1]). -export([async_finish/1, finish/1]). -export([async_wait_for_event/1, wait_for_event/1]). init() -> case lists:member({1,0}, cl:versions()) of false -> erlang:error(cl_1_0_not_supported); true -> ok end. start(Args) -> cl:start(Args). start() -> cl:start(). stop() -> cl:stop(). get_platform_ids() -> cl:get_platform_ids(). platform_info() -> cl:platform_info(). get_platform_info(A1) -> cl:get_platform_info(A1). get_platform_info(A1,A2) -> cl:get_platform_info(A1,A2). get_device_ids() -> cl:get_device_ids(). get_device_ids(A1,A2) -> cl:get_device_ids(A1,A2). device_info() -> cl:device_info_10([]). get_device_info(A1) -> cl:get_device_info(A1). get_device_info(A1,A2) -> cl:get_device_info(A1,A2). create_context(A1) -> cl:create_context(A1). create_context_from_type(A1) -> cl:create_context_from_type(A1). release_context(A1) -> cl:release_context(A1). retain_context(A1) -> cl:retain_context(A1). context_info() -> cl:context_info(). get_context_info(A1) -> cl:get_context_info(A1). get_context_info(A1,A2) -> cl:get_context_info(A1,A2). create_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3). -spec set_queue_property(_, _, _) -> no_return(). set_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3). release_queue(A1) -> cl:release_queue(A1). retain_queue(A1) -> cl:retain_queue(A1). queue_info() -> cl:queue_info(). get_queue_info(A1) -> cl:get_queue_info(A1). get_queue_info(A1,A2) -> cl:get_queue_info(A1,A2). create_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3). create_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4). release_mem_object(A1) -> cl:release_mem_object(A1). retain_mem_object(A1) -> cl:retain_mem_object(A1). mem_object_info() -> cl:mem_object_info(). get_mem_object_info(A1) -> cl:get_mem_object_info(A1). get_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2). image_info() -> cl:image_info(). get_image_info(A1) -> cl:get_image_info(A1). get_image_info(A1,A2) -> cl:get_image_info(A1,A2). get_supported_image_formats(A1,A2,A3) -> cl:get_supported_image_formats(A1,A2,A3). create_image2d(A1,A2,A3,A4,A5,A6,A7) -> cl:create_image2d(A1,A2,A3,A4,A5,A6,A7). create_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> cl:create_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9). create_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4). release_sampler(A1) -> cl:release_sampler(A1). retain_sampler(A1) -> cl:retain_sampler(A1). sampler_info() -> cl:sampler_info(). get_sampler_info(A1) -> cl:get_sampler_info(A1). get_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2). create_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2). create_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3). release_program(A1) -> cl:release_program(A1). retain_program(A1) -> cl:retain_program(A1). build_program(A1,A2,A3) -> cl:build_program(A1,A2,A3). async_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3). unload_compiler() -> cl:unload_compiler(). program_info() -> cl:program_info(). get_program_info(A1) -> cl:get_program_info(A1). get_program_info(A1,A2) -> cl:get_program_info(A1,A2). program_build_info() -> cl:program_build_info(). get_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2). get_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3). create_kernel(A1,A2) -> cl:create_kernel(A1,A2). create_kernels_in_program(A1) -> cl:create_kernels_in_program(A1). set_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3). set_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3). release_kernel(A1) -> cl:release_kernel(A1). retain_kernel(A1) -> cl:retain_kernel(A1). kernel_info() -> cl:kernel_info(). get_kernel_info(A1) -> cl:get_kernel_info(A1). get_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2). kernel_workgroup_info() -> cl:kernel_workgroup_info(). get_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2). get_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3). enqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). enqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4). nowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3). enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6). nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_marker(A1) -> cl:enqueue_marker(A1). enqueue_barrier(A1) -> cl:enqueue_barrier(A1). enqueue_wait_for_events(A1,A2) -> cl:enqueue_wait_for_events(A1,A2). enqueue_read_buffer(A1,A2,A3,A4,A5) -> cl:enqueue_read_buffer(A1,A2,A3,A4,A5). enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7). nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_read_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9). nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_copy_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6). enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7). enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7). enqueue_map_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6). enqueue_map_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_image(A1,A2,A3,A4,A5,A6). enqueue_unmap_mem_object(A1,A2,A3) -> cl:enqueue_unmap_mem_object(A1,A2,A3). release_event(A1) -> cl:release_event(A1). retain_event(A1) -> cl:retain_event(A1). event_info() -> cl:event_info(). get_event_info(A1) -> cl:get_event_info(A1). get_event_info(A1,A2) -> cl:get_event_info(A1,A2). wait(A1) -> cl:wait(A1). wait(A1,A2) -> cl:wait(A1,A2). async_flush(A1) -> cl:async_flush(A1). flush(A1) -> cl:flush(A1). async_finish(A1) -> cl:async_finish(A1). finish(A1) -> cl:finish(A1). async_wait_for_event(A1) -> cl:async_wait_for_event(A1). wait_for_event(A1) -> cl:wait_for_event(A1). cl-cl-1.2.3/src/cl11.erl000066400000000000000000000244341301041406700145450ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %%% @author Tony Rogvall %%% @copyright (C) 2013, Tony Rogvall %%% @doc %%% OpenCL 1.1 API %%% @end %%% Created : 13 Jan 2013 by Tony Rogvall -module(cl11). -on_load(init/0). -export([start/0, start/1, stop/0]). %% Platform -export([get_platform_ids/0]). -export([platform_info/0]). -export([get_platform_info/1,get_platform_info/2]). %% Devices -export([get_device_ids/0, get_device_ids/2]). -export([device_info/0]). -export([get_device_info/1,get_device_info/2]). %% Context -export([create_context/1]). -export([create_context_from_type/1]). -export([release_context/1]). -export([retain_context/1]). -export([context_info/0]). -export([get_context_info/1,get_context_info/2]). %% Command queue -export([create_queue/3]). -export([set_queue_property/3]). -export([release_queue/1]). -export([retain_queue/1]). -export([queue_info/0]). -export([get_queue_info/1,get_queue_info/2]). %% Memory object -export([create_buffer/3, create_buffer/4]). -export([release_mem_object/1]). -export([retain_mem_object/1]). -export([mem_object_info/0]). -export([get_mem_object_info/1,get_mem_object_info/2]). -export([image_info/0]). -export([get_image_info/1,get_image_info/2]). -export([get_supported_image_formats/3]). -export([create_image2d/7]). -export([create_image3d/9]). %% Sampler -export([create_sampler/4]). -export([release_sampler/1]). -export([retain_sampler/1]). -export([sampler_info/0]). -export([get_sampler_info/1,get_sampler_info/2]). %% Program -export([create_program_with_source/2]). -export([create_program_with_binary/3]). -export([release_program/1]). -export([retain_program/1]). -export([build_program/3, async_build_program/3]). -export([unload_compiler/0]). -export([program_info/0]). -export([get_program_info/1,get_program_info/2]). -export([program_build_info/0]). -export([get_program_build_info/2,get_program_build_info/3]). %% Kernel -export([create_kernel/2]). -export([create_kernels_in_program/1]). -export([set_kernel_arg/3]). -export([set_kernel_arg_size/3]). -export([release_kernel/1]). -export([retain_kernel/1]). -export([kernel_info/0]). -export([get_kernel_info/1,get_kernel_info/2]). -export([kernel_workgroup_info/0]). -export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]). %% Events -export([enqueue_task/3, enqueue_task/4]). -export([nowait_enqueue_task/3]). -export([enqueue_nd_range_kernel/5]). -export([enqueue_nd_range_kernel/6]). -export([nowait_enqueue_nd_range_kernel/5]). -export([enqueue_marker/1]). -export([enqueue_barrier/1]). -export([enqueue_wait_for_events/2]). -export([enqueue_read_buffer/5]). -export([enqueue_write_buffer/6]). -export([enqueue_write_buffer/7]). -export([nowait_enqueue_write_buffer/6]). -export([enqueue_read_image/7]). -export([enqueue_write_image/8]). -export([enqueue_write_image/9]). -export([nowait_enqueue_write_image/8]). -export([enqueue_copy_image/6]). -export([enqueue_copy_image_to_buffer/7]). -export([enqueue_copy_buffer_to_image/7]). -export([enqueue_map_buffer/6]). -export([enqueue_map_image/6]). -export([enqueue_unmap_mem_object/3]). -export([release_event/1]). -export([retain_event/1]). -export([event_info/0]). -export([get_event_info/1, get_event_info/2]). -export([wait/1, wait/2]). -export([async_flush/1, flush/1]). -export([async_finish/1, finish/1]). -export([async_wait_for_event/1, wait_for_event/1]). init() -> case lists:member({1,1}, cl:versions()) of false -> erlang:error(cl_1_1_not_supported); true -> ok end. start(Args) -> cl:start(Args). start() -> cl:start(). stop() -> cl:stop(). get_platform_ids() -> cl:get_platform_ids(). platform_info() -> cl:platform_info(). get_platform_info(A1) -> cl:get_platform_info(A1). get_platform_info(A1,A2) -> cl:get_platform_info(A1,A2). get_device_ids() -> cl:get_device_ids(). get_device_ids(A1,A2) -> cl:get_device_ids(A1,A2). device_info() -> cl:device_info_10(cl:device_info_11([])). get_device_info(A1) -> cl:get_device_info(A1). get_device_info(A1,A2) -> cl:get_device_info(A1,A2). create_context(A1) -> cl:create_context(A1). create_context_from_type(A1) -> cl:create_context_from_type(A1). release_context(A1) -> cl:release_context(A1). retain_context(A1) -> cl:retain_context(A1). context_info() -> cl:context_info(). get_context_info(A1) -> cl:get_context_info(A1). get_context_info(A1,A2) -> cl:get_context_info(A1,A2). create_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3). -spec set_queue_property(_, _, _) -> no_return(). set_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3). release_queue(A1) -> cl:release_queue(A1). retain_queue(A1) -> cl:retain_queue(A1). queue_info() -> cl:queue_info(). get_queue_info(A1) -> cl:get_queue_info(A1). get_queue_info(A1,A2) -> cl:get_queue_info(A1,A2). create_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3). create_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4). release_mem_object(A1) -> cl:release_mem_object(A1). retain_mem_object(A1) -> cl:retain_mem_object(A1). mem_object_info() -> cl:mem_object_info(). get_mem_object_info(A1) -> cl:get_mem_object_info(A1). get_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2). image_info() -> cl:image_info(). get_image_info(A1) -> cl:get_image_info(A1). get_image_info(A1,A2) -> cl:get_image_info(A1,A2). get_supported_image_formats(A1,A2,A3) -> cl:get_supported_image_formats(A1,A2,A3). create_image2d(A1,A2,A3,A4,A5,A6,A7) -> cl:create_image2d(A1,A2,A3,A4,A5,A6,A7). create_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> cl:create_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9). create_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4). release_sampler(A1) -> cl:release_sampler(A1). retain_sampler(A1) -> cl:retain_sampler(A1). sampler_info() -> cl:sampler_info(). get_sampler_info(A1) -> cl:get_sampler_info(A1). get_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2). create_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2). create_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3). release_program(A1) -> cl:release_program(A1). retain_program(A1) -> cl:retain_program(A1). build_program(A1,A2,A3) -> cl:build_program(A1,A2,A3). async_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3). unload_compiler() -> cl:unload_compiler(). program_info() -> cl:program_info(). get_program_info(A1) -> cl:get_program_info(A1). get_program_info(A1,A2) -> cl:get_program_info(A1,A2). program_build_info() -> cl:program_build_info(). get_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2). get_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3). create_kernel(A1,A2) -> cl:create_kernel(A1,A2). create_kernels_in_program(A1) -> cl:create_kernels_in_program(A1). set_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3). set_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3). release_kernel(A1) -> cl:release_kernel(A1). retain_kernel(A1) -> cl:retain_kernel(A1). kernel_info() -> cl:kernel_info(). get_kernel_info(A1) -> cl:get_kernel_info(A1). get_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2). kernel_workgroup_info() -> cl:kernel_workgroup_info(). get_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2). get_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3). enqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). enqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4). nowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3). enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6). nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_marker(A1) -> cl:enqueue_marker(A1). enqueue_barrier(A1) -> cl:enqueue_barrier(A1). enqueue_wait_for_events(A1,A2) -> cl:enqueue_wait_for_events(A1,A2). enqueue_read_buffer(A1,A2,A3,A4,A5) -> cl:enqueue_read_buffer(A1,A2,A3,A4,A5). enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7). nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_read_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9). nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_copy_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6). enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7). enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7). enqueue_map_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6). enqueue_map_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_image(A1,A2,A3,A4,A5,A6). enqueue_unmap_mem_object(A1,A2,A3) -> cl:enqueue_unmap_mem_object(A1,A2,A3). release_event(A1) -> cl:release_event(A1). retain_event(A1) -> cl:retain_event(A1). event_info() -> cl:event_info(). get_event_info(A1) -> cl:get_event_info(A1). get_event_info(A1,A2) -> cl:get_event_info(A1,A2). wait(A1) -> cl:wait(A1). wait(A1,A2) -> cl:wait(A1,A2). async_flush(A1) -> cl:async_flush(A1). flush(A1) -> cl:flush(A1). async_finish(A1) -> cl:async_finish(A1). finish(A1) -> cl:finish(A1). async_wait_for_event(A1) -> cl:async_wait_for_event(A1). wait_for_event(A1) -> cl:wait_for_event(A1). cl-cl-1.2.3/src/cl12.erl000066400000000000000000000242531301041406700145450ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %%% @author Tony Rogvall %%% @copyright (C) 2013, Tony Rogvall %%% @doc %%% OpenCL 1.2 API %%% @end %%% Created : 13 Jan 2013 by Tony Rogvall -module(cl12). -on_load(init/0). -export([start/0, start/1, stop/0]). -export([get_platform_ids/0]). -export([platform_info/0]). -export([get_platform_info/1,get_platform_info/2]). -export([get_device_ids/0, get_device_ids/2]). -export([device_info/0]). -export([get_device_info/1,get_device_info/2]). -export([create_context/1]). -export([create_context_from_type/1]). -export([release_context/1]). -export([retain_context/1]). -export([context_info/0]). -export([get_context_info/1,get_context_info/2]). -export([create_queue/3]). -export([set_queue_property/3]). -export([release_queue/1]). -export([retain_queue/1]). -export([queue_info/0]). -export([get_queue_info/1,get_queue_info/2]). -export([create_buffer/3, create_buffer/4]). -export([release_mem_object/1]). -export([retain_mem_object/1]). -export([mem_object_info/0]). -export([get_mem_object_info/1,get_mem_object_info/2]). -export([image_info/0]). -export([get_image_info/1,get_image_info/2]). -export([get_supported_image_formats/3]). -export([create_image/5]). -export([create_sampler/4]). -export([release_sampler/1]). -export([retain_sampler/1]). -export([sampler_info/0]). -export([get_sampler_info/1,get_sampler_info/2]). -export([create_program_with_source/2]). -export([create_program_with_binary/3]). -export([release_program/1]). -export([retain_program/1]). -export([build_program/3, async_build_program/3]). -export([unload_platform_compiler/1]). -export([program_info/0]). -export([get_program_info/1,get_program_info/2]). -export([program_build_info/0]). -export([get_program_build_info/2,get_program_build_info/3]). -export([create_kernel/2]). -export([create_kernels_in_program/1]). -export([set_kernel_arg/3]). -export([set_kernel_arg_size/3]). -export([release_kernel/1]). -export([retain_kernel/1]). -export([kernel_info/0]). -export([get_kernel_info/1,get_kernel_info/2]). -export([kernel_workgroup_info/0]). -export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]). -export([enqueue_task/3, enqueue_task/4]). -export([nowait_enqueue_task/3]). -export([enqueue_nd_range_kernel/5]). -export([enqueue_nd_range_kernel/6]). -export([nowait_enqueue_nd_range_kernel/5]). -export([enqueue_marker_with_wait_list/2]). -export([enqueue_barrier_with_wait_list/2]). -export([enqueue_wait_for_events/2]). -export([enqueue_read_buffer/5]). -export([enqueue_write_buffer/6]). -export([enqueue_write_buffer/7]). -export([nowait_enqueue_write_buffer/6]). -export([enqueue_read_image/7]). -export([enqueue_write_image/8]). -export([enqueue_write_image/9]). -export([nowait_enqueue_write_image/8]). -export([enqueue_copy_image/6]). -export([enqueue_copy_image_to_buffer/7]). -export([enqueue_copy_buffer_to_image/7]). -export([enqueue_map_buffer/6]). -export([enqueue_map_image/6]). -export([enqueue_unmap_mem_object/3]). -export([release_event/1]). -export([retain_event/1]). -export([event_info/0]). -export([get_event_info/1, get_event_info/2]). -export([wait/1, wait/2]). -export([async_flush/1, flush/1]). -export([async_finish/1, finish/1]). -export([async_wait_for_event/1, wait_for_event/1]). init() -> case lists:member({1,2}, cl:versions()) of false -> erlang:error(cl_1_2_not_supported); true -> ok end. start(Args) -> cl:start(Args). start() -> cl:start(). stop() -> cl:stop(). get_platform_ids() -> cl:get_platform_ids(). platform_info() -> cl:platform_info(). get_platform_info(A1) -> cl:get_platform_info(A1). get_platform_info(A1,A2) -> cl:get_platform_info(A1,A2). get_device_ids() -> cl:get_device_ids(). get_device_ids(A1,A2) -> cl:get_device_ids(A1,A2). device_info() -> cl:device_info_10(cl:device_info_11(cl:device_info_12([]))). get_device_info(A1) -> cl:get_device_info(A1). get_device_info(A1,A2) -> cl:get_device_info(A1,A2). create_context(A1) -> cl:create_context(A1). create_context_from_type(A1) -> cl:create_context_from_type(A1). release_context(A1) -> cl:release_context(A1). retain_context(A1) -> cl:retain_context(A1). context_info() -> cl:context_info(). get_context_info(A1) -> cl:get_context_info(A1). get_context_info(A1,A2) -> cl:get_context_info(A1,A2). create_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3). -spec set_queue_property(_, _, _) -> no_return(). set_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3). release_queue(A1) -> cl:release_queue(A1). retain_queue(A1) -> cl:retain_queue(A1). queue_info() -> cl:queue_info(). get_queue_info(A1) -> cl:get_queue_info(A1). get_queue_info(A1,A2) -> cl:get_queue_info(A1,A2). create_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3). create_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4). release_mem_object(A1) -> cl:release_mem_object(A1). retain_mem_object(A1) -> cl:retain_mem_object(A1). mem_object_info() -> cl:mem_object_info(). get_mem_object_info(A1) -> cl:get_mem_object_info(A1). get_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2). image_info() -> cl:image_info(). get_image_info(A1) -> cl:get_image_info(A1). get_image_info(A1,A2) -> cl:get_image_info(A1,A2). get_supported_image_formats(A1,A2,A3) -> cl:get_supported_image_formats(A1,A2,A3). create_image(A1,A2,A3,A4,A5) -> cl:create_image(A1,A2,A3,A4,A5). create_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4). release_sampler(A1) -> cl:release_sampler(A1). retain_sampler(A1) -> cl:retain_sampler(A1). sampler_info() -> cl:sampler_info(). get_sampler_info(A1) -> cl:get_sampler_info(A1). get_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2). create_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2). create_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3). release_program(A1) -> cl:release_program(A1). retain_program(A1) -> cl:retain_program(A1). build_program(A1,A2,A3) -> cl:build_program(A1,A2,A3). async_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3). unload_platform_compiler(A1) -> cl:unload_platform_compiler(A1). program_info() -> cl:program_info(). get_program_info(A1) -> cl:get_program_info(A1). get_program_info(A1,A2) -> cl:get_program_info(A1,A2). program_build_info() -> cl:program_build_info(). get_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2). get_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3). create_kernel(A1,A2) -> cl:create_kernel(A1,A2). create_kernels_in_program(A1) -> cl:create_kernels_in_program(A1). set_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3). set_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3). release_kernel(A1) -> cl:release_kernel(A1). retain_kernel(A1) -> cl:retain_kernel(A1). kernel_info() -> cl:kernel_info(). get_kernel_info(A1) -> cl:get_kernel_info(A1). get_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2). kernel_workgroup_info() -> cl:kernel_workgroup_info(). get_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2). get_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3). enqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). enqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4). nowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3). enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6). nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_marker_with_wait_list(A1,A2) -> cl:enqueue_marker_with_wait_list(A1,A2). enqueue_barrier_with_wait_list(A1,A2) -> cl:enqueue_barrier_with_wait_list(A1,A2). enqueue_wait_for_events(A1,A2) -> cl:enqueue_wait_for_events(A1,A2). enqueue_read_buffer(A1,A2,A3,A4,A5) -> cl:enqueue_read_buffer(A1,A2,A3,A4,A5). enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7). nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_read_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9). nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_copy_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6). enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7). enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7). enqueue_map_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6). enqueue_map_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_image(A1,A2,A3,A4,A5,A6). enqueue_unmap_mem_object(A1,A2,A3) -> cl:enqueue_unmap_mem_object(A1,A2,A3). release_event(A1) -> cl:release_event(A1). retain_event(A1) -> cl:retain_event(A1). event_info() -> cl:event_info(). get_event_info(A1) -> cl:get_event_info(A1). get_event_info(A1,A2) -> cl:get_event_info(A1,A2). wait(A1) -> cl:wait(A1). wait(A1,A2) -> cl:wait(A1,A2). async_flush(A1) -> cl:async_flush(A1). flush(A1) -> cl:flush(A1). async_finish(A1) -> cl:async_finish(A1). finish(A1) -> cl:finish(A1). async_wait_for_event(A1) -> cl:async_wait_for_event(A1). wait_for_event(A1) -> cl:wait_for_event(A1). cl-cl-1.2.3/src/cl13.erl000066400000000000000000000243001301041406700145370ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %%% @author Tony Rogvall %%% @copyright (C) 2013, Tony Rogvall %%% @doc %%% DUMMY OpenCL 1.3 API - place holder %%% @end %%% Created : 13 Jan 2013 by Tony Rogvall -module(cl13). -on_load(init/0). -export([start/0, start/1, stop/0]). -export([get_platform_ids/0]). -export([platform_info/0]). -export([get_platform_info/1,get_platform_info/2]). -export([get_device_ids/0, get_device_ids/2]). -export([device_info/0]). -export([get_device_info/1,get_device_info/2]). -export([create_context/1]). -export([create_context_from_type/1]). -export([release_context/1]). -export([retain_context/1]). -export([context_info/0]). -export([get_context_info/1,get_context_info/2]). -export([create_queue/3]). -export([set_queue_property/3]). -export([release_queue/1]). -export([retain_queue/1]). -export([queue_info/0]). -export([get_queue_info/1,get_queue_info/2]). -export([create_buffer/3, create_buffer/4]). -export([release_mem_object/1]). -export([retain_mem_object/1]). -export([mem_object_info/0]). -export([get_mem_object_info/1,get_mem_object_info/2]). -export([image_info/0]). -export([get_image_info/1,get_image_info/2]). -export([get_supported_image_formats/3]). -export([create_image/5]). -export([create_sampler/4]). -export([release_sampler/1]). -export([retain_sampler/1]). -export([sampler_info/0]). -export([get_sampler_info/1,get_sampler_info/2]). -export([create_program_with_source/2]). -export([create_program_with_binary/3]). -export([release_program/1]). -export([retain_program/1]). -export([build_program/3, async_build_program/3]). -export([unload_platform_compiler/1]). -export([program_info/0]). -export([get_program_info/1,get_program_info/2]). -export([program_build_info/0]). -export([get_program_build_info/2,get_program_build_info/3]). -export([create_kernel/2]). -export([create_kernels_in_program/1]). -export([set_kernel_arg/3]). -export([set_kernel_arg_size/3]). -export([release_kernel/1]). -export([retain_kernel/1]). -export([kernel_info/0]). -export([get_kernel_info/1,get_kernel_info/2]). -export([kernel_workgroup_info/0]). -export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]). -export([enqueue_task/3, enqueue_task/4]). -export([nowait_enqueue_task/3]). -export([enqueue_nd_range_kernel/5]). -export([enqueue_nd_range_kernel/6]). -export([nowait_enqueue_nd_range_kernel/5]). -export([enqueue_marker_with_wait_list/2]). -export([enqueue_barrier_with_wait_list/2]). -export([enqueue_wait_for_events/2]). -export([enqueue_read_buffer/5]). -export([enqueue_write_buffer/6]). -export([enqueue_write_buffer/7]). -export([nowait_enqueue_write_buffer/6]). -export([enqueue_read_image/7]). -export([enqueue_write_image/8]). -export([enqueue_write_image/9]). -export([nowait_enqueue_write_image/8]). -export([enqueue_copy_image/6]). -export([enqueue_copy_image_to_buffer/7]). -export([enqueue_copy_buffer_to_image/7]). -export([enqueue_map_buffer/6]). -export([enqueue_map_image/6]). -export([enqueue_unmap_mem_object/3]). -export([release_event/1]). -export([retain_event/1]). -export([event_info/0]). -export([get_event_info/1, get_event_info/2]). -export([wait/1, wait/2]). -export([async_flush/1, flush/1]). -export([async_finish/1, finish/1]). -export([async_wait_for_event/1, wait_for_event/1]). init() -> case lists:member({1,3}, cl:versions()) of false -> erlang:error(cl_1_3_not_supported); true -> ok end. start(Args) -> cl:start(Args). start() -> cl:start(). stop() -> cl:stop(). get_platform_ids() -> cl:get_platform_ids(). platform_info() -> cl:platform_info(). get_platform_info(A1) -> cl:get_platform_info(A1). get_platform_info(A1,A2) -> cl:get_platform_info(A1,A2). get_device_ids() -> cl:get_device_ids(). get_device_ids(A1,A2) -> cl:get_device_ids(A1,A2). device_info() -> cl:device_info_10(cl:device_info_11(cl:device_info_12([]))). get_device_info(A1) -> cl:get_device_info(A1). get_device_info(A1,A2) -> cl:get_device_info(A1,A2). create_context(A1) -> cl:create_context(A1). create_context_from_type(A1) -> cl:create_context_from_type(A1). release_context(A1) -> cl:release_context(A1). retain_context(A1) -> cl:retain_context(A1). context_info() -> cl:context_info(). get_context_info(A1) -> cl:get_context_info(A1). get_context_info(A1,A2) -> cl:get_context_info(A1,A2). create_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3). -spec set_queue_property(_, _, _) -> no_return(). set_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3). release_queue(A1) -> cl:release_queue(A1). retain_queue(A1) -> cl:retain_queue(A1). queue_info() -> cl:queue_info(). get_queue_info(A1) -> cl:get_queue_info(A1). get_queue_info(A1,A2) -> cl:get_queue_info(A1,A2). create_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3). create_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4). release_mem_object(A1) -> cl:release_mem_object(A1). retain_mem_object(A1) -> cl:retain_mem_object(A1). mem_object_info() -> cl:mem_object_info(). get_mem_object_info(A1) -> cl:get_mem_object_info(A1). get_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2). image_info() -> cl:image_info(). get_image_info(A1) -> cl:get_image_info(A1). get_image_info(A1,A2) -> cl:get_image_info(A1,A2). get_supported_image_formats(A1,A2,A3) -> cl:get_supported_image_formats(A1,A2,A3). create_image(A1,A2,A3,A4,A5) -> cl:create_image(A1,A2,A3,A4,A5). create_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4). release_sampler(A1) -> cl:release_sampler(A1). retain_sampler(A1) -> cl:retain_sampler(A1). sampler_info() -> cl:sampler_info(). get_sampler_info(A1) -> cl:get_sampler_info(A1). get_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2). create_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2). create_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3). release_program(A1) -> cl:release_program(A1). retain_program(A1) -> cl:retain_program(A1). build_program(A1,A2,A3) -> cl:build_program(A1,A2,A3). async_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3). unload_platform_compiler(A1) -> cl:unload_platform_compiler(A1). program_info() -> cl:program_info(). get_program_info(A1) -> cl:get_program_info(A1). get_program_info(A1,A2) -> cl:get_program_info(A1,A2). program_build_info() -> cl:program_build_info(). get_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2). get_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3). create_kernel(A1,A2) -> cl:create_kernel(A1,A2). create_kernels_in_program(A1) -> cl:create_kernels_in_program(A1). set_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3). set_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3). release_kernel(A1) -> cl:release_kernel(A1). retain_kernel(A1) -> cl:retain_kernel(A1). kernel_info() -> cl:kernel_info(). get_kernel_info(A1) -> cl:get_kernel_info(A1). get_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2). kernel_workgroup_info() -> cl:kernel_workgroup_info(). get_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2). get_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3). enqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). enqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4). nowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3). enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) -> cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6). nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5). enqueue_marker_with_wait_list(A1,A2) -> cl:enqueue_marker_with_wait_list(A1,A2). enqueue_barrier_with_wait_list(A1,A2) -> cl:enqueue_barrier_with_wait_list(A1,A2). enqueue_wait_for_events(A1,A2) -> cl:enqueue_wait_for_events(A1,A2). enqueue_read_buffer(A1,A2,A3,A4,A5) -> cl:enqueue_read_buffer(A1,A2,A3,A4,A5). enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7). nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6). enqueue_read_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9). nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8). enqueue_copy_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6). enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7). enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) -> cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7). enqueue_map_buffer(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6). enqueue_map_image(A1,A2,A3,A4,A5,A6) -> cl:enqueue_map_image(A1,A2,A3,A4,A5,A6). enqueue_unmap_mem_object(A1,A2,A3) -> cl:enqueue_unmap_mem_object(A1,A2,A3). release_event(A1) -> cl:release_event(A1). retain_event(A1) -> cl:retain_event(A1). event_info() -> cl:event_info(). get_event_info(A1) -> cl:get_event_info(A1). get_event_info(A1,A2) -> cl:get_event_info(A1,A2). wait(A1) -> cl:wait(A1). wait(A1,A2) -> cl:wait(A1,A2). async_flush(A1) -> cl:async_flush(A1). flush(A1) -> cl:flush(A1). async_finish(A1) -> cl:async_finish(A1). finish(A1) -> cl:finish(A1). async_wait_for_event(A1) -> cl:async_wait_for_event(A1). wait_for_event(A1) -> cl:wait_for_event(A1). cl-cl-1.2.3/src/clu.erl000066400000000000000000000177241301041406700145740ustar00rootroot00000000000000%%%---- BEGIN COPYRIGHT ------------------------------------------------------- %%% %%% Copyright (C) 2007 - 2012, Rogvall Invest AB, %%% %%% This software is licensed as described in the file COPYRIGHT, which %%% you should have received as part of this distribution. The terms %%% are also available at http://www.rogvall.se/docs/copyright.txt. %%% %%% You may opt to use, copy, modify, merge, publish, distribute and/or sell %%% copies of the Software, and permit persons to whom the Software is %%% furnished to do so, under the terms of the COPYRIGHT file. %%% %%% This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY %%% KIND, either express or implied. %%% %%%---- END COPYRIGHT --------------------------------------------------------- %%% File : clu.erl %%% Author : Tony Rogvall %%% Description : Utilities %%% Created : 30 Oct 2009 by Tony Rogvall -module(clu). -export([setup/0, setup/1, teardown/1]). -export([context/1, device_list/1, device/1]). -export([build_source/2,build_source/3]). -export([build_binary/2,build_binary/3]). -export([build_source_file/2, build_source_file/3]). -export([compile_file/1,compile_file/2]). -export([get_program_binaries/1]). -export([apply_kernel_args/2]). -export([wait_complete/1]). -export([device_has_extension/2]). -export([devices_has_extension/2]). -include("../include/cl.hrl"). -import(lists, [map/2]). -type clu_state() :: #cl{} | undefined. %% %% @type clu_state() = any() %% %% %% @doc setup the platform and an initial context using %% devices of type DevType. Setup currently use the %% first platform found only. %% @spec setup(DevType::cl_device_type()) -> clu_state() %% setup(DevType) -> cl:start(), {ok,Ps} = cl:get_platform_ids(), setup(DevType, Ps). setup(DevType, [Platform|Ps]) -> case cl:get_device_ids(Platform,DevType) of {ok, []} -> setup(DevType, Ps); {ok,DeviceList} -> case cl:create_context(DeviceList) of {ok,Context} -> #cl { platform = Platform, devices = DeviceList, context = Context }; {error, _} when Ps /= [] -> setup(DevType, Ps); Other -> Other end; {error, device_not_found} -> setup(DevType, Ps); {error, _} when Ps /= [] -> setup(DevType, Ps); Other -> Other end; setup(DevType, []) -> {error, {device_not_found, DevType}}. %% %% @doc setup a clu context with all devices. %% %% @spec setup() -> clu_state() %% setup() -> setup(all). %% %% @doc Release the context setup by clu:setup(). %% %% @spec teardown(E::clu_state()) -> 'ok' | {'error',cl_error()} %% teardown(E) -> cl:release_context(E#cl.context). %% %% Fetch context %% context(E) -> E#cl.context. %% %% Fetch device list %% device_list(E) -> E#cl.devices. %% %% Fetch first device %% device(E) -> hd(E#cl.devices). %% %% @doc Create and build a OpenCL program from a string. %% %% @spec build_source(E::clu_state(), Source::iodata()) -> %% {'ok',cl_program()} | {'error',{cl_error(), Logs}} %% -spec build_source(E::clu_state(), Source::iodata()) -> {'ok',cl_program()} | {'error',{cl_error(),Logs::term}}. build_source(E, Source) -> build_source(E, Source, ""). -spec build_source(E::clu_state(), Source::iodata(), Options::string()) -> {'ok',cl_program()} | {'error',{cl_error(),Logs::term}}. build_source(E, Source, Options) -> {ok,Program} = cl:create_program_with_source(E#cl.context,Source), case cl:build_program(Program, E#cl.devices, Options) of ok -> Status = [cl:get_program_build_info(Program, Dev, status) || Dev <- E#cl.devices], case lists:any(fun({ok, success}) -> true; (_) -> false end, Status) of true -> {ok,Program}; false -> Logs = get_program_logs(Program), io:format("Logs: ~s\n", [Logs]), {error,{Status,Logs}} end; Error -> Logs = get_program_logs(Program), io:format("Logs: ~s\n", [Logs]), cl:release_program(Program), {error,{Error,Logs}} end. -spec build_source_file(E::clu_state(), File::string()) -> {'ok',cl_program()} | {'error',{cl_error(),Logs::term}}. build_source_file(E,File) -> build_source_file(E, File,""). -spec build_source_file(E::clu_state(), File::string(), Options::string()) -> {'ok',cl_program()} | {'error',{cl_error(),Logs::term}}. build_source_file(E, File,Options) -> case file:read_file(File) of {ok,Binary} -> build_source(E,Binary,Options); Error -> Error end. -spec compile_file(File::string()) -> {'ok',{[cl_device_id()],[binary()]}} | {'error',{cl_error(),Logs::term}}. compile_file(File) -> compile_file(File,""). -spec compile_file(File::string(), Options::string()) -> {'ok',{[cl_device_id()],[binary()]}} | {'error',{cl_error(),Logs::term}}. compile_file(File,Options) -> E = setup(all), Result = build_source_file(E,File,Options), Res = case Result of {error,{_,_Logs}} -> %% Listed in build_source, should it be? %% lists:foreach( %% fun(Log) -> io:format("~s\n", [Log]) end, %% Logs), Result; {ok,Program} -> BRes = get_program_binaries(Program), cl:release_program(Program), BRes; Error -> Error end, teardown(E), Res. %% @doc Retrieve the binaries associated with a program build. %% the binaries may be cached for later use with build_binary/2. %% %% @spec get_program_binaries(Program::cl_program()) -> %% {ok,{[cl_device_id()],[binary()]}} %% get_program_binaries(Program) -> {ok,DeviceList} = cl:get_program_info(Program, devices), {ok,BinaryList} = cl:get_program_info(Program, binaries), {ok,{DeviceList, BinaryList}}. get_program_logs(Program) -> {ok,DeviceList} = cl:get_program_info(Program, devices), map(fun(Device) -> {ok,Log} = cl:get_program_build_info(Program,Device,log), Log end, DeviceList). build_binary(E, {DeviceList,BinaryList}) -> build_binary(E, {DeviceList,BinaryList},""). build_binary(E, {DeviceList,BinaryList},Options) -> {ok,Program} = cl:create_program_with_binary(E#cl.context, DeviceList, BinaryList), case cl:build_program(Program, DeviceList, Options) of ok -> {ok,Program}; Error -> Logs = map(fun(Device) -> {ok,Log} = cl:get_program_build_info(Program, Device,log), Log end, E#cl.devices), {error,{Error,Logs}} end. %% %% Utility to set all kernel arguments (and do arity check) %% apply_kernel_args(Kernel, Args) -> {ok,N} = cl:get_kernel_info(Kernel, num_args), Arity = length(Args), if N /= Arity -> {ok,Name} = cl:get_kernel_info(Kernel, function_name), erlang:error({bad_arity,Name,N}); true -> try apply_args(Kernel, 0, Args) catch error:{badmatch,Error} -> erlang:error(Error) end end. apply_args(Kernel, I, [{local,Size}|As]) -> %%io:format("kernel set arg ~w size to ~p\n", [I,Size]), ok = cl:set_kernel_arg_size(Kernel,I,Size), apply_args(Kernel,I+1,As); apply_args(Kernel,I,[A|As]) -> %%io:format("kernel set arg ~w to ~p\n", [I,A]), ok = cl:set_kernel_arg(Kernel,I,A), apply_args(Kernel,I+1,As); apply_args(_Kernel, _I, []) -> ok. %% manual wait for event to complete (crash on failure) %% should test for error status wait_complete(Event) -> case cl:get_event_info(Event, execution_status) of {ok,complete} -> ok; {ok,Other} -> io:format("Status: ~p\n", [Other]), timer:sleep(100), wait_complete(Event) end. %% %% utility function to test if an extension is present in a device %% device_has_extension(Device, Extension) when is_atom(Extension) -> device_has_extension(Device, atom_to_list(Extension)); device_has_extension(Device, Extension) when is_list(Extension) -> {ok,Extensions} = cl:get_device_info(Device,extensions), lists:member(Extension, string:tokens(Extensions, " ")). devices_has_extension(Clu, Extension) -> lists:all( fun(D) -> device_has_extension(D, Extension) end, device_list(Clu)). cl-cl-1.2.3/test/000077500000000000000000000000001301041406700134625ustar00rootroot00000000000000cl-cl-1.2.3/test/cl_SUITE.erl000066400000000000000000000013651301041406700155420ustar00rootroot00000000000000%%% File : cl_SUITE %%% Author : Dan Gudmundsson %%% Description : test cl -module(cl_SUITE). -export([all/0, init_per_suite/1, end_per_suite/1]). -include("cl.hrl"). all() -> [{cl_test, all}, {cl_basic, ct_test}, {cl_binary_test, ct_test}, {cl_buffer, all}, {cl_image, all} ]. init_per_suite(Config) -> try io:format("Running init per SUITE: ~p~n", [Config]), CLU = clu:setup(), {ok, [Type|_]} = cl:get_device_info(clu:device(CLU), type), clu:teardown(CLU), [{type, Type}|Config] catch _:Reason -> io:format("Skipping test case failed to figure out cl device~n"), io:format("~p: ~p~n",[Reason, erlang:get_stacktrace()]), {skip, "Can not find cl type"} end. end_per_suite(_) -> ok. cl-cl-1.2.3/test/cl_basic.erl000066400000000000000000000167331301041406700157370ustar00rootroot00000000000000%% Basic tests -module(cl_basic). -compile(export_all). -import(lists, [foreach/2]). -include("../include/cl.hrl"). init_per_suite(Config) -> cl_SUITE:init_per_suite(Config). test() -> test(all). ct_test(Config) when is_list(Config) -> test(all). test(DevType) -> E = clu:setup(DevType), {ok,PlatformInfo} = cl:get_platform_info(E#cl.platform), io:format("PlatformInfo: ~p\n", [PlatformInfo]), foreach( fun(Device) -> io:format("Device: ~p\n", [Device]), io:format("DeviceInfo:\n", []), {ok,DeviceInfo} = cl:get_device_info(Device), lists:foreach( fun({Attr,Value}) -> io:format(" ~s: ~p\n", [Attr,Value]), case (Attr =:= extensions) andalso lists:member("cl_nv_device_attribute_query", string:tokens(Value," ")) of true -> nv_device_info(Device); false -> ok end end, DeviceInfo) end, E#cl.devices), {ok,ContextInfo} = cl:get_context_info(E#cl.context), io:format("ContextInfo: ~p\n", [ContextInfo]), cl:retain_context(E#cl.context), {ok,ContextInfo2} = cl:get_context_info(E#cl.context), io:format("Context2: ~p\n", [ContextInfo2]), foreach(fun(Device) -> test_queue(E, Device) end, E#cl.devices), foreach(fun(Device) -> test_sampler(E, Device) end, E#cl.devices), test_program(E#cl.context, E#cl.devices), clu:teardown(E). nv_device_info(Device) -> io:format(" cl_nv_device_attribute_query:\n", []), lists:foreach( fun(NvAttr) -> case cl:get_device_info(Device, NvAttr) of {ok,NvValue} -> io:format(" ~s: ~p\n", [NvAttr,NvValue]); {error,Reason} -> io:format("InfoError: ~s [~p]\n", [NvAttr,Reason]) end end, [ compute_capability_major_nv, compute_capability_minor_nv, registers_per_block_nv, warp_size_nv, gpu_overlap_nv, kernel_exec_timeout_nv, device_integrated_memory_nv]), case {cl:get_device_info(Device, compute_capability_major_nv), cl:get_device_info(Device, compute_capability_minor_nv) } of {{ok,Major},{ok,Minor}} -> io:format(" ~s: ~p\n", [compute_capability_major_nv,Major]), io:format(" ~s: ~p\n", [compute_capability_mainor_nv,Minor]), Cores = case {Major,Minor} of {1,1} -> 8; {1,2} -> 8; {1,3} -> 8; {2,0} -> 32; {2,1} -> 48; {3,0} -> 192; {3,5} -> 192; {5,0} -> 128; _ -> 0 %% unknown (to me) end, ComputeUnits = case cl:get_device_info(Device, max_compute_units) of {ok,U} -> U; {error,_} -> 0 end, io:format(" number_of_cores: ~w\n", [Cores]), io:format(" total_number_of_cores: ~w\n", [ComputeUnits*Cores]); _ -> ok end. test_program(Context, DeviceList) -> %% Program1 Source1 = " __kernel void program1(int n, int m) { int result = n + m; } ", {ok,Program} = cl:create_program_with_source(Context,Source1), foreach( fun(Device) -> {ok,Status} = cl:get_program_build_info(Program,Device,status), io:format("Status @ ~w: ~p\n", [Device,Status]) end, DeviceList), io:format("Program: ~p\n", [Program]), program_info(Program), foreach( fun(Device) -> build_info(Program, Device) end, DeviceList), case cl:build_program(Program, DeviceList, "-Dhello=1 -Dtest") of ok -> foreach( fun(Device) -> build_info(Program, Device) end, DeviceList), program_info(Program), {ok,Kernels} = cl:create_kernels_in_program(Program), foreach( fun(Kernel) -> {ok,KernelInfo} = cl:get_kernel_info(Kernel), io:format("KernelInfo: ~p\n", [KernelInfo]), foreach( fun(Device) -> {ok,I}=cl:get_kernel_workgroup_info(Kernel,Device), io:format("KernelWorkGroupInfo: ~p\n", [I]) end, DeviceList), case lists:member({1,2}, cl:versions()) of true -> ArgInfo = (catch cl:get_kernel_arg_info(Kernel)), io:format("arg_info: ~p\n", [ArgInfo]); false -> ok end end, Kernels), foreach( fun(Device) -> {ok,Queue} = cl:create_queue(Context,Device,[]), foreach( fun(Kernel) -> cl:set_kernel_arg(Kernel, 0, 12), cl:set_kernel_arg(Kernel, 1, 13), {ok,Event} = cl:enqueue_task(Queue, Kernel, []), {ok,EventInfo} = cl:get_event_info(Event), io:format("EventInfo: ~p\n", [EventInfo]), cl:flush(Queue), io:format("Event Status:=~p\n", [cl:wait(Event,1000)]) end, Kernels) end, DeviceList), ok; Error -> io:format("\n\nBuild Error: ~p\n\n", [Error]), foreach( fun(Device) -> {ok,BuildInfo} = cl:get_program_build_info(Program,Device), io:format("BuildInfo @ ~w: ~p\n", [Device,BuildInfo]) end, DeviceList) end, cl:release_program(Program), ok. program_info(Program) -> io:format("ProgramInfo:\n", []), foreach( fun(Attr) -> case cl:get_program_info(Program,Attr) of {ok,Value} -> io:format(" ~s: ~p\n", [Attr,Value]); {error,Reason} -> io:format("InfoError: ~s [~p]\n", [Attr,Reason]) end end, cl:program_info()). build_info(Program, Device) -> io:format("BuildInfo @ ~w\n", [Device]), {ok,BuildInfo} = cl:get_program_build_info(Program,Device), lists:foreach( fun({Attr,Value}) -> io:format(" ~s: ~p\n", [Attr,Value]) end, BuildInfo), case lists:member({1,2}, cl:versions()) of true -> %% fixme: version handle program_build_info case cl:get_program_build_info(Program,Device,binary_type) of {ok,BinaryInfo} -> io:format(" ~s: ~p\n", [binary_type,BinaryInfo]); {error,Reason} -> io:format("InfoError: ~s [~p]\n", [binary_type,Reason]) end; false -> ok end. test_queue(E, Device) -> {ok,Queue} = cl:create_queue(E#cl.context,Device,[]), io:format("Queue: ~p\n", [Queue]), {ok,QueueInfo} = cl:get_queue_info(Queue), io:format("QueueInfo: ~p\n", [QueueInfo]), cl:release_queue(Queue), ok. test_buffer(E) -> %% Read/Write buffer {ok,Buffer} = cl:create_buffer(E#cl.context,[read_write],1024), io:format("Buffer: ~p\n", [Buffer]), {ok,BufferInfo} = cl:get_mem_object_info(Buffer), io:format("BufferInfo: ~p\n", [BufferInfo]), cl:release_mem_object(Buffer), %% Read only buffer {ok,Buffer2} = cl:create_buffer(E#cl.context,[read_only],0, <<"Hello brave new world">>), io:format("Buffer2: ~p\n", [Buffer2]), {ok,Buffer2Info} = cl:get_mem_object_info(Buffer2), io:format("Buffer2Info: ~p\n", [Buffer2Info]), cl:release_mem_object(Buffer2), ok. test_sampler(E, Device) -> {ok,DeviceInfo} = cl:get_device_info(Device), Name = proplists:get_value(name, DeviceInfo), case proplists:get_value(image_support, DeviceInfo) of true -> %% Sampler1 {ok,Sampler1} = cl:create_sampler(E#cl.context,true,clamp,nearest), io:format("Sampler1: ~p\n", [Sampler1]), {ok,Sampler1Info} = cl:get_sampler_info(Sampler1), io:format("Sampler1Info: ~p\n", [Sampler1Info]), cl:release_sampler(Sampler1), %% Sampler2 {ok,Sampler2} = cl:create_sampler(E#cl.context,false,repeat,linear), io:format("Sampler2: ~p\n", [Sampler2]), {ok,Sampler2Info} = cl:get_sampler_info(Sampler2), io:format("Sampler2Info: ~p\n", [Sampler2Info]), cl:release_sampler(Sampler2), ok; false -> io:format("No image support for device ~s ~n",[Name]) end. cl-cl-1.2.3/test/cl_binary_test.erl000066400000000000000000000011641301041406700171710ustar00rootroot00000000000000%%% File : cl_binary_test.erl %%% Author : Tony Rogvall %%% Description : test build of binary programs %%% Created : 7 Nov 2009 by Tony Rogvall -module(cl_binary_test). -export([test/0, ct_test/1, init_per_suite/1]). init_per_suite(Config) -> cl_SUITE:init_per_suite(Config). ct_test(_) -> test(). test() -> E = clu:setup(), {ok,P1} = clu:build_source(E, "__kernel void foo(int n) { int x; x = n; }"), {ok,B} = clu:get_program_binaries(P1), ok = cl:release_program(P1), {ok,P2} = clu:build_binary(E, B), ok = cl:release_program(P2), ok. cl-cl-1.2.3/test/cl_buffer.erl000066400000000000000000000122741301041406700161230ustar00rootroot00000000000000%%% @author Tony Rogvall %%% @copyright (C) 2014, Tony Rogvall %%% @doc %%% Buffer test/example %%% @end %%% Created : 8 May 2014 by Tony Rogvall -module(cl_buffer). -compile(export_all). init_per_suite(Config) -> cl_SUITE:init_per_suite(Config). all() -> [copy, read_rect, write_rect, sub, fill, migrate]. %% test write/copy/read %% test of copy buffer, require version 1.0 copy(Config) -> C = clu:setup(proplists:get_value(type, Config, gpu)), {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]), {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 1024), {ok,Buf2} = cl:create_buffer(clu:context(C),[read_write], 1024), Data1 = erlang:iolist_to_binary(lists:duplicate(4,lists:seq(0,255))), {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 1024, Data1, []), {ok,E2} = cl:enqueue_copy_buffer(Q, Buf1, Buf2, 0, 0, 1024, [E1]), {ok,E3} = cl:enqueue_read_buffer(Q, Buf2, 0, 1024, [E2]), cl:flush(Q), cl:wait_for_events([E1,E2]), {ok,Data2} = cl:wait(E3), clu:teardown(C), Data1 =:= Data2. %% read rectangluar area, require version 1.1 read_rect(Config) -> C = clu:setup(proplists:get_value(type, Config, gpu)), true = lists:member({1,1},cl:versions()), {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]), {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8), Data1 = <<0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,1,2,3,4,0,0, 0,0,5,6,7,8,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0, 0,0,0,0,0,0,0>>, {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 64, Data1, []), {ok,E2} = cl:enqueue_read_buffer_rect(Q, Buf1, [2,2,0], [0,0,0], [4,2,1], 8, 0, 4, 0, [E1]), cl:wait_for_events([E1]), {ok,Data2} = cl:wait(E2), clu:teardown(C), Data2 =:= <<1,2,3,4,5,6,7,8>>. %% write rectangluar area, require version 1.1 write_rect(Config) -> C = clu:setup(proplists:get_value(type, Config, gpu)), true = lists:member({1,1},cl:versions()), {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]), {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8), Data0 = <<9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9>>, {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 64, Data0, []), Data1 = <<1,2,3,4, 5,6,7,8>>, {ok,E2} = cl:enqueue_write_buffer_rect(Q, Buf1, [2,2,0], [0,0,0], [4,2,1], 8, 0, 4, 0, Data1, [E1]), {ok,E3} = cl:enqueue_read_buffer(Q, Buf1, 0, 64, [E2]), cl:flush(Q), cl:wait_for_events([E1,E2]), {ok,Data3} = cl:wait(E3), clu:teardown(C), Data3 =:= <<9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,1,2,3,4,9,9, 9,9,5,6,7,8,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9>>. %% cerate sub buffer, require version 1.1 sub(Config) -> C = clu:setup(proplists:get_value(type, Config, gpu)), true = lists:member({1,1},cl:versions()), {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]), {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8), Data1 = <<0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,1,2,3,4,0,0, 0,0,5,6,7,8,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0, 0,0,0,0,0,0,0>>, {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 64, Data1, []), {ok,Buf2} = cl:create_sub_buffer(Buf1,[read_write],region,[18,14]), {ok,E2} = cl:enqueue_read_buffer(Q, Buf2, 0, 12, [E1]), cl:flush(Q), cl:wait_for_events([E1]), {ok,Data2} = cl:wait(E2), clu:teardown(C), Data2 =:= <<1,2,3,4,0,0,0,0,5,6,7,8>>. %% fill buffer, require version 1.2 fill(Config) -> C = clu:setup(proplists:get_value(type, Config, gpu)), true = lists:member({1,2},cl:versions()), {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]), {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8), {ok,E1} = cl:enqueue_fill_buffer(Q, Buf1, <<9>>, 0, 64, []), {ok,E2} = cl:enqueue_fill_buffer(Q, Buf1, <<1,2,3,4>>, 12, 4, [E1]), {ok,E3} = cl:enqueue_fill_buffer(Q, Buf1, <<5,6,7,8>>, 20, 4, [E2]), {ok,E4} = cl:enqueue_read_buffer(Q, Buf1, 0, 64, [E3]), cl:flush(Q), cl:wait_for_events([E1,E2,E3]), {ok,Data1} = cl:wait(E4), clu:teardown(C), Data1 =:= <<9,9,9,9,9,9,9,9, 9,9,9,9,1,2,3,4, 9,9,9,9,5,6,7,8, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9>>. migrate(_) -> C = clu:setup(all), true = lists:member({1,2},cl:versions()), case clu:device_list(C) of [D1,D2|_] -> {ok,Q1} = cl:create_queue(clu:context(C),D1,[]), {ok,Q2} = cl:create_queue(clu:context(C),D2,[]), {ok,B1} = cl:create_buffer(clu:context(C),[read_write], 8*8), {ok,E1} = cl:enqueue_fill_buffer(Q1, B1, <<9>>, 0, 64, []), cl:flush(Q1), {ok,completed} = cl:wait(E1), {ok,E2} = cl:enqueue_migrate_mem_objects(Q2, [B1], [], []), cl:flush(Q2), %% fixme: add a kernel to check that the data was migrated cl:wait(E2); _ -> ignore end. cl-cl-1.2.3/test/cl_image.erl000066400000000000000000000133431301041406700157320ustar00rootroot00000000000000%%% @author Tony Rogvall %%% @copyright (C) 2014, Tony Rogvall %%% @doc %%% cl_image test %%% @end %%% Created : 9 May 2014 by Tony Rogvall -module(cl_image). -compile(export_all). -include_lib("cl/include/cl.hrl"). init_per_suite(Config) -> cl_SUITE:init_per_suite(Config). all() -> [create_image2d_a, create_image2d_b, create_image2d_c, create_image2d_d, create_image3d_a, create_image3d_b, create_image3d_c, create_image3d_d, pixop ]. create_image2d_a(Config) -> C = clu:setup(proplists:get_value(type,Config,gpu)), cl:create_image2d(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 64, 64, 0, <<>>). create_image2d_b(Config) -> C = clu:setup(proplists:get_value(type,Config,gpu)), ImageData = create_image2d_data(64, 64, 4), cl:create_image2d(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 64, 64, 64*4, ImageData). create_image2d_c(Config) -> true = lists:member({1,2},cl:versions()), C = clu:setup(proplists:get_value(type,Config,gpu)), cl:create_image(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, #cl_image_desc { image_type = image2d, image_width = 64, image_height = 64, image_depth = 1, image_array_size = 1, image_row_pitch = 0 }, <<>>). create_image2d_d(Config) -> true = lists:member({1,2},cl:versions()), C = clu:setup(proplists:get_value(type,Config,gpu)), ImageData = create_image2d_data(64, 64, 4), cl:create_image(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, #cl_image_desc { image_type = image2d, image_width = 64, image_height = 64, image_depth = 1, image_array_size = 1, image_row_pitch = 64*4 }, ImageData). create_image2d_data(W,H,BytesPerPixel) -> << <<1234:BytesPerPixel/unit:8>> || _ <- lists:seq(1,W), _ <- lists:seq(1,H) >>. create_image3d_a(Config) -> C = clu:setup(proplists:get_value(type,Config,gpu)), cl:create_image3d(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 64, 64, 64, 0, 0, <<>>). create_image3d_b(Config) -> C = clu:setup(proplists:get_value(type,Config,gpu)), ImageData = create_image3d_data(64, 64, 64, 4), cl:create_image3d(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 64, 64, 64, 64*4, 64*64*4, ImageData). create_image3d_c(Config) -> true = lists:member({1,2},cl:versions()), C = clu:setup(proplists:get_value(type,Config,gpu)), cl:create_image(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, #cl_image_desc { image_type = image3d, image_width = 64, image_height = 64, image_depth = 64, image_array_size = 1, image_row_pitch = 0, image_slice_pitch = 0 }, <<>>). create_image3d_d(Config) -> true = lists:member({1,2},cl:versions()), C = clu:setup(proplists:get_value(type,Config,gpu)), ImageData = create_image3d_data(64, 64, 64, 4), cl:create_image(clu:context(C),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, #cl_image_desc { image_type = image3d, image_width = 64, image_height = 64, image_depth = 64, image_array_size = 1, image_row_pitch = 64*4, image_slice_pitch = 64*64*4 }, ImageData). create_image3d_data(W,H,D,BytesPerPixel) -> << <> || Di <- lists:seq(1,D), _ <- lists:seq(1,H), _ <- lists:seq(1,W) >>. %% test image pixel operations pixop(Config) -> exit({skip, "Fails on linux machine"}), Clu = clu:setup(proplists:get_value(type,Config,cpu)), {ok,A} = cl:create_image2d(clu:context(Clu),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 2, 2, 2*4, <<100,200,50,127, 25,255,50,100, 30,64,10,20, 3,2,1,220>> ), %% {ok,E1} = cl:enqueue_write_image(Q, A, [0,0], [2,2], 2*4, 0, Data, []), {ok,B} = cl:create_image2d(clu:context(Clu),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 2, 2, 2*4, <<50,100,25,255, 100,100,100,127, 100,200,50,127, 1,2,3,20>>), {ok,C} = cl:create_image2d(clu:context(Clu),[read_write], #cl_image_format { cl_channel_order = rgba, cl_channel_type = unorm_int8 }, 2, 2, 0, <<>>), {ok,Q} = cl:create_queue(clu:context(Clu),clu:device(Clu),[]), File = case proplists:get_value(data_dir, Config) of false -> "pixop.cl"; Dir -> filename:join(filename:dirname(filename:dirname(Dir)), "pixop.cl") end, io:format("File: ~p~n", [File]), {ok,Program} = clu:build_source_file(Clu, File, ""), {ok,Kernel} = cl:create_kernel(Program, "pixmap_blend"), clu:apply_kernel_args(Kernel, [A,B,C,2,2]), {ok,E1} = cl:enqueue_nd_range_kernel(Q, Kernel, [2,2], [], []), cl:flush(Q), {ok,completed} = cl:wait(E1), {ok,E2} = cl:enqueue_read_image(Q, C, [0,0], [2,2], 2*4, 0, []), cl:flush(Q), {ok,Data} = cl:wait(E2), Data. cl-cl-1.2.3/test/cl_test.erl000066400000000000000000000053331301041406700156270ustar00rootroot00000000000000%%% @author Tony Rogvall %%% @copyright (C) 2010, Tony Rogvall %%% @doc %%% %%% @end %%% Created : 25 Dec 2010 by Tony Rogvall -module(cl_test). -compile(export_all). -import(lists, [foreach/2]). -define(BUFFER_SIZE, 1024*256). init_per_suite(Config) -> cl_SUITE:init_per_suite(Config). all() -> [ct_test0, ct_test1]. test0() -> test0(cpu, ?BUFFER_SIZE). ct_test0(Config) -> test0(proplists:get_value(type, Config, gpu), ?BUFFER_SIZE). test0(Type, Size) -> {ok,[PI|_]} = cl:get_platform_ids(), {ok,[D]} = cl:get_device_ids(PI, Type), {ok,C} = cl:create_context([D]), {ok,Q} = cl:create_queue(C, D, []), {ok,Buf} = cl:create_buffer(C, [read_only], Size), N = Size div 2, Data = make_buffer(N), {ok,E1} = cl:enqueue_write_buffer(Q, Buf, 0, N, Data, []), {ok,E2} = cl:enqueue_write_buffer(Q, Buf, N, N, Data, []), Res1 = cl:wait(E1,3000), io:format("Res1 = ~p\n", [Res1]), Res2 = cl:wait(E2,3000), io:format("Res2 = ~p\n", [Res2]), {ok,E3} = cl:enqueue_read_buffer(Q, Buf, 0, N, []), case cl:wait(E3,3000) of {ok, Data} -> io:format("read_buffer: verified\n"), ok; Res3 -> io:format("Res3 = ~p\n", [Res3]) end. program(ok) -> " __kernel void program1(int n, int m) { int result = n + m; } "; program(error) -> " __kernel void program1(int n, int m) { int result = n + k; } ". ct_test1(Config) -> test1(proplists:get_value(type, Config, gpu), ok). test1() -> test1(cpu, ok). test1(Type, Prog) -> {ok,[PI|_]} = cl:get_platform_ids(), {ok,DeviceList} = cl:get_device_ids(PI, Type), {ok,C} = cl:create_context(DeviceList), {ok,P} = cl:create_program_with_source(C, program(Prog)), io:format("Program: ~p\n", [P]), {ok,Info} = cl:get_program_info(P), io:format("ProgramInfo: ~p\n", [Info]), foreach( fun(D) -> {ok,BuildInfo} = cl:get_program_build_info(P,D), io:format("BuildInfo @ ~w: ~p\n", [D,BuildInfo]) end, DeviceList), case cl:build_program(P, DeviceList, "-Dhello=1 -Dtest") of ok -> foreach( fun(D) -> {ok,BuildInfo} = cl:get_program_build_info(P,D), io:format("BuildInfo @ ~w: ~p\n", [D,BuildInfo]) end, DeviceList), ok; Error -> io:format("\n\nBuild Error: ~p\n\n", [Error]), foreach( fun(D) -> {ok,BuildInfo} = cl:get_program_build_info(P,D), io:format("BuildInfo @ ~w: ~p\n", [D,BuildInfo]) end, DeviceList) end. make_buffer(0) -> <<>>; make_buffer(1) -> <<1>>; make_buffer(2) -> <<1,2>>; make_buffer(N) -> Bin = make_buffer(N div 2), if N band 1 =:= 1 -> list_to_binary([1,Bin,Bin]); true -> list_to_binary([Bin,Bin]) end. cl-cl-1.2.3/test/pixop.cl000066400000000000000000000020061301041406700151370ustar00rootroot00000000000000/* -*- c -*- * */ float4 pixel_over(float4 a, float4 b); float4 pixel_blend(float4 a, float4 b); float4 pixel_blend(float4 a, float4 b) { return a.w*a + (1-a.w)*b; } float4 pixel_over(float4 a, float4 b) { return a.w*a + (1-a.w)*b.w*b; } kernel void pixmap_over(read_write image2d_t a, read_write image2d_t b, read_write image2d_t c, uint w, uint h) { int x = get_global_id(0); int y = get_global_id(1); if ((x < (int)w) && (y < (int)h)) { int2 coord = {x,y}; float4 ap = read_imagef(a, coord); float4 bp = read_imagef(b, coord); float4 cp = pixel_over(ap, bp); write_imagef(c, coord, cp); } } kernel void pixmap_blend(read_write image2d_t a, read_write image2d_t b, read_write image2d_t c, uint w, uint h) { int x = get_global_id(0); int y = get_global_id(1); if ((x < (int)w) && (y < (int)h)) { int2 coord = {x,y}; float4 ap = read_imagef(a, coord); float4 bp = read_imagef(b, coord); float4 cp = pixel_blend(ap, bp); write_imagef(c, coord, cp); } }