Beignet-1.1.1-Source/examples/v4l2_buffer_sharing/v4l2_buffer_sharing.cpp000664 001750 001750 00000042301 12576733264 025504 0ustar00yryr000000 000000 /* ** Copyright (c) 2012, 2015 Intel Corporation. All Rights Reserved. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and associated documentation files (the ** "Software"), to deal in the Software without restriction, including ** without limitation the rights to use, copy, modify, merge, publish, ** distribute, sub license, and/or sell copies of the Software, and to ** permit persons to whom the Software is furnished to do so, subject to ** the following conditions: ** ** The above copyright notice and this permission notice (including the ** next paragraph) shall be included in all copies or substantial portions ** of the Software. ** ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ** IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ** ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "va_display.h" #include "utest_helper.hpp" using namespace std; #define BUFFER_NUM_DEFAULT 5 #define VIDEO_NODE_DEFAULT "/dev/video0" #define WIDTH_DEFAULT 640 #define HEIGHT_DEFAULT 480 #define CHECK_VASTATUS(va_status,func) \ if (va_status != VA_STATUS_SUCCESS) { \ fprintf(stderr, "status = %d, %s: %s(line %d) failed, exit\n",va_status, __func__, func, __LINE__); \ exit(1); \ } #define CHECK_CLSTATUS(status,func) \ if (status != CL_SUCCESS) { \ fprintf(stderr, "status = %d, %s: %s(line %d) failed, exit\n", status, __func__, func, __LINE__); \ exit(1); \ } #define CHECK_V4L2ERROR(ret, STR) \ if (ret){ \ fprintf(stderr, STR); \ perror(" "); \ fprintf(stderr, "ret = %d, %s: %s(line %d) failed, exit\n", ret, __func__, STR, __LINE__); \ exit(1); \ } VADisplay va_dpy; cl_int cl_status; VAStatus va_status; VASurfaceID nv12_surface_id; VAImage nv12_image; int dev_fd; uint64_t image_size; unsigned int pitch; cl_mem *import_buf = NULL; typedef cl_int (OCLGETMEMOBJECTFD)(cl_context, cl_mem, int *); OCLGETMEMOBJECTFD *oclGetMemObjectFd = NULL; int frame_count = 0; struct v4l2_options{ const char *dev_name; unsigned int width, height; unsigned int spec_res; unsigned int buffer_num; unsigned int do_list; } vo; int *import_buf_fd = NULL; static const char short_options[] = "d:r:b:lh"; static const struct option long_options[] = { { "device", required_argument, NULL, 'd' }, { "help", no_argument, NULL, 'h' }, { "resolution", required_argument, NULL, 'r' }, { "buffer_num", required_argument, NULL, 'b' }, { "list", no_argument, NULL, 'l' }, { 0, 0, 0, 0 } }; static void usage(FILE *fp, int argc, char **argv) { fprintf(fp, "This example aims to demostrate the usage of DMABUF buffer sharing between v4l2 and Beignet.\n" "For more details, please read docs/howto/v4l2-buffer-sharing-howto.mdwn.\n" "Usage: %s [options]\n\n" "Options:\n" "-d | --device= Specify device by instead of /dev/video0\n" "-h | --help Print this message\n" "-r | --resolution= Set image resolution\n" "-b | --buffer_num= Set number of buffers\n" "-l | --list List available resolution of format 'V4L2_PIX_FMT_YUYV'\n" "", argv[0]); } static void list_resolution(){ int ret; struct v4l2_capability cap; struct v4l2_frmsizeenum frm_sz; dev_fd = open(vo.dev_name, O_RDWR | O_NONBLOCK, 0); if (dev_fd < 0) { fprintf(stderr, "Can not open %s: %s\n", vo.dev_name, strerror(errno)); exit(1); } memset(&cap, 0, sizeof(cap)); ret = ioctl(dev_fd, VIDIOC_QUERYCAP, &cap); CHECK_V4L2ERROR(ret, "VIDIOC_QUERYCAP"); if(!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)){ fprintf(stderr, "The device is not video capture device\n"); exit(1); } if(!(cap.capabilities & V4L2_CAP_STREAMING)){ fprintf(stderr, "The device does not support streaming i/o\n"); exit(1); } printf("Supported resolution under pixel format 'V4L2_PIX_FMT_YUYV':\n"); frm_sz.pixel_format = V4L2_PIX_FMT_YUYV; frm_sz.index = 0; bool extra_info = true; while (ioctl(dev_fd, VIDIOC_ENUM_FRAMESIZES, &frm_sz) == 0) { if (frm_sz.type == V4L2_FRMSIZE_TYPE_DISCRETE) { if(extra_info){ printf("(width, height) = \n"); extra_info = false; } printf("(%d, %d)", frm_sz.discrete.width, frm_sz.discrete.height); printf("\n"); } else if (frm_sz.type == V4L2_FRMSIZE_TYPE_STEPWISE) { printf("(width, height) from (%d, %d) to (%d, %d) with step (%d, %d)", frm_sz.stepwise.min_width, frm_sz.stepwise.min_height, frm_sz.stepwise.max_width, frm_sz.stepwise.max_height, frm_sz.stepwise.step_width, frm_sz.stepwise.step_height); continue; } frm_sz.index++; } ret = close(dev_fd); if (ret) { fprintf(stderr, "Failed to close %s: %s\n", vo.dev_name, strerror(errno)); exit(1); } } static void analyse_args(int argc, char *argv[]) { vo.dev_name = NULL; vo.width = 0; vo.height = 0; vo.spec_res = 0; vo.buffer_num = BUFFER_NUM_DEFAULT; vo.do_list = 0; int c, idx; for (;;) { c = getopt_long(argc, argv, short_options, long_options, &idx); if (-1 == c) break; switch (c) { case 0: break; case 'd': vo.dev_name = optarg; break; case '?': case 'h': usage(stdout, argc, argv); exit(0); case 'r': sscanf(optarg, "%d,%d", &vo.width, &vo.height); vo.spec_res = 1; break; case 'b': vo.buffer_num = strtoul(optarg, NULL, 0); break; case 'l': vo.do_list = 1; break; default: usage(stderr, argc, argv); exit(1); } } if(!vo.dev_name){ printf("Haven't specified device, use default device: %s\n", VIDEO_NODE_DEFAULT); } if(!vo.dev_name) vo.dev_name = VIDEO_NODE_DEFAULT; if(vo.do_list){ list_resolution(); exit(0); } if(!vo.spec_res){ printf("Haven't specified resolution, use default resolution: (width,height) = (%d, %d)\n", WIDTH_DEFAULT, HEIGHT_DEFAULT); vo.width = WIDTH_DEFAULT; vo.height = HEIGHT_DEFAULT; } return; } static void initialize_va_ocl(){ int major_ver, minor_ver; printf("\n***********************libva info: ***********************\n"); fflush(stdout); va_dpy = va_open_display(); va_status = vaInitialize(va_dpy, &major_ver, &minor_ver); CHECK_VASTATUS(va_status, "vaInitialize"); VASurfaceAttrib forcc; forcc.type =VASurfaceAttribPixelFormat; forcc.flags=VA_SURFACE_ATTRIB_SETTABLE; forcc.value.type=VAGenericValueTypeInteger; forcc.value.value.i = VA_FOURCC_NV12; va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_YUV420, vo.width, vo.height, &nv12_surface_id, 1, &forcc, 1); CHECK_VASTATUS(va_status, "vaCreateSurfaces"); VAImageFormat image_fmt; image_fmt.fourcc = VA_FOURCC_NV12; image_fmt.byte_order = VA_LSB_FIRST; image_fmt.bits_per_pixel = 12; va_status = vaCreateImage(va_dpy, &image_fmt, vo.width, vo.height, &nv12_image); CHECK_VASTATUS(va_status, "vaCreateImage"); //ocl initialization: basic & create kernel & get extension printf("\n***********************OpenCL info: ***********************\n"); if ((cl_status = cl_test_init("runtime_yuy2_processing.cl", "runtime_yuy2_processing", SOURCE)) != 0){ fprintf(stderr, "cl_test_init error\n"); exit(1); } #ifdef CL_VERSION_1_2 oclGetMemObjectFd = (OCLGETMEMOBJECTFD *)clGetExtensionFunctionAddressForPlatform(platform, "clGetMemObjectFdIntel"); #else oclGetMemObjectFd = (OCLGETMEMOBJECTFD *)clGetExtensionFunctionAddress("clGetMemObjectFdIntel"); #endif if(!oclGetMemObjectFd){ fprintf(stderr, "Failed to get extension clGetMemObjectFdIntel\n"); exit(1); } printf("\n***********************************************************\n"); } static void create_dmasharing_buffers() { if(import_buf_fd == NULL) import_buf_fd = (int *)malloc(sizeof(int) * vo.buffer_num); if(import_buf == NULL){ import_buf = (cl_mem *)malloc(sizeof(cl_mem) * vo.buffer_num); } for (unsigned int i = 0; i < vo.buffer_num; ++i){ import_buf[i] = clCreateBuffer(ctx, CL_MEM_READ_WRITE, image_size, NULL, &cl_status); CHECK_CLSTATUS(cl_status, "clCreateBuffer"); //get cl buffer object's fd cl_status = oclGetMemObjectFd(ctx, import_buf[i], &import_buf_fd[i]); CHECK_CLSTATUS(cl_status, "clGetMemObjectFdIntel"); } } static void release_va_ocl(){ va_status = vaDestroySurfaces(va_dpy,&nv12_surface_id,1); CHECK_VASTATUS(va_status, "vaDestroySurfaces"); va_status = vaDestroyImage(va_dpy, nv12_image.image_id); CHECK_VASTATUS(va_status, "vaDestroyImage"); va_status = vaTerminate(va_dpy); CHECK_VASTATUS(va_status, "vaTerminate"); va_close_display(va_dpy); int ret; for (unsigned int i = 0; i < vo.buffer_num; ++i) { ret = close(import_buf_fd[i]); if (ret) { fprintf(stderr, "Failed to close import_buf[%u]'s fd: %s\n", i, strerror(errno)); } cl_status = clReleaseMemObject(import_buf[i]); CHECK_CLSTATUS(cl_status, "clReleaseMemObject"); } } static void process_show_frame(int index) { //process import_buf[index] by ocl size_t global_size[2]; global_size[0] = vo.width * 2 / 4; global_size[1] = vo.height; cl_status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &import_buf[index]); CHECK_CLSTATUS(cl_status, "clSetKernelArg"); cl_status = clSetKernelArg(kernel, 1, sizeof(int), &vo.height); CHECK_CLSTATUS(cl_status, "clSetKernelArg"); cl_status = clSetKernelArg(kernel, 2, sizeof(int), &pitch); CHECK_CLSTATUS(cl_status, "clSetKernelArg"); cl_status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL); CHECK_CLSTATUS(cl_status, "clEnqueueNDRangeKernel"); cl_status = clFinish(queue); CHECK_CLSTATUS(cl_status, "clFinish"); //create corresponding VASurface VASurfaceID yuy2_surface_id; VASurfaceAttrib sa[2]; sa[0].type = VASurfaceAttribMemoryType; sa[0].flags = VA_SURFACE_ATTRIB_SETTABLE; sa[0].value.type = VAGenericValueTypeInteger; sa[0].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; sa[1].type = VASurfaceAttribExternalBufferDescriptor; sa[1].flags = VA_SURFACE_ATTRIB_SETTABLE; sa[1].value.type = VAGenericValueTypePointer; VASurfaceAttribExternalBuffers sa_eb; sa_eb.pixel_format = VA_FOURCC_YUY2; sa_eb.width = vo.width; sa_eb.height = vo.height; sa_eb.data_size = image_size; sa_eb.num_planes = 1; sa_eb.pitches[0] = pitch; sa_eb.offsets[0] = 0; sa_eb.num_buffers = 1; sa_eb.buffers = (unsigned long *)malloc(sizeof(unsigned long) * sa_eb.num_buffers); sa_eb.buffers[0] = import_buf_fd[index]; sa_eb.flags = 0; sa[1].value.value.p = &sa_eb; va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_YUV422, vo.width, vo.height, &yuy2_surface_id, 1, sa, 2); CHECK_VASTATUS(va_status, "vaCreateSurfaces"); //convert to NV12 format va_status = vaGetImage (va_dpy, yuy2_surface_id, 0, 0, vo.width, vo.height, nv12_image.image_id); CHECK_VASTATUS(va_status, "vaGetImage"); va_status = vaPutImage(va_dpy, nv12_surface_id, nv12_image.image_id, 0, 0, vo.width, vo.height, 0, 0, vo.width, vo.height); CHECK_VASTATUS(va_status, "vaPutImage"); //show by vaPutsurface VARectangle src_rect, dst_rect; src_rect.x = 0; src_rect.y = 0; src_rect.width = vo.width; src_rect.height = vo.height; dst_rect = src_rect; va_status = va_put_surface(va_dpy, nv12_surface_id, &src_rect, &dst_rect); CHECK_VASTATUS(va_status, "vaPutSurface"); vaDestroySurfaces(va_dpy,&yuy2_surface_id,1); CHECK_VASTATUS(va_status, "vaDestroySurfaces"); free(sa_eb.buffers); return; } static void init_dmabuf(void){ int ret; struct v4l2_requestbuffers reqbuf; memset(&reqbuf, 0, sizeof(reqbuf)); reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; reqbuf.memory = V4L2_MEMORY_DMABUF; reqbuf.count = vo.buffer_num; ret = ioctl(dev_fd, VIDIOC_REQBUFS, &reqbuf); if(ret == -1 && errno == EINVAL){ fprintf(stderr, "Video capturing or DMABUF streaming is not supported\n"); exit(1); } else CHECK_V4L2ERROR(ret, "VIDIOC_REQBUFS"); create_dmasharing_buffers(); printf("Succeed to create %d dma buffers \n", vo.buffer_num); } static void init_device(void){ int ret; struct v4l2_capability cap; struct v4l2_format format; dev_fd = open(vo.dev_name, O_RDWR | O_NONBLOCK, 0); if (dev_fd < 0) { fprintf(stderr, "Can not open %s: %s\n", vo.dev_name, strerror(errno)); exit(1); } memset(&cap, 0, sizeof(cap)); ret = ioctl(dev_fd, VIDIOC_QUERYCAP, &cap); CHECK_V4L2ERROR(ret, "VIDIOC_QUERYCAP"); if(!(cap.capabilities & V4L2_CAP_STREAMING)){ fprintf(stderr, "The device does not support streaming i/o\n"); exit(1); } memset(&format, 0, sizeof(format)); format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; format.fmt.pix.width = vo.width; format.fmt.pix.height = vo.height; format.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV; format.fmt.pix.field = V4L2_FIELD_ANY; ret = ioctl(dev_fd, VIDIOC_S_FMT, &format); CHECK_V4L2ERROR(ret, "VIDIOC_S_FMT"); ret = ioctl(dev_fd, VIDIOC_G_FMT, &format); CHECK_V4L2ERROR(ret, "VIDIOC_G_FMT"); if(format.fmt.pix.pixelformat != V4L2_PIX_FMT_YUYV){ fprintf(stderr, "V4L2_PIX_FMT_YUYV format is not supported by %s\n", vo.dev_name); exit(1); } if(format.fmt.pix.width != vo.width || format.fmt.pix.height != vo.height){ fprintf(stderr, "This resolution is not supported, please go through supported resolution by command './main -l'\n"); exit(1); } printf("Input image format: (width, height) = (%u, %u), pixel format = %.4s\n", format.fmt.pix.width, format.fmt.pix.height, (char*)&format.fmt.pix.pixelformat); image_size = format.fmt.pix.sizeimage; pitch = format.fmt.pix.bytesperline; } static void start_capturing(void){ int ret; for (unsigned int i = 0; i < vo.buffer_num; ++i) { struct v4l2_buffer buf; memset(&buf, 0, sizeof(buf)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_DMABUF; buf.index = i; buf.m.fd = import_buf_fd[i]; ret = ioctl(dev_fd, VIDIOC_QBUF, &buf); CHECK_V4L2ERROR(ret, "VIDIOC_QBUF"); } int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; ret = ioctl(dev_fd, VIDIOC_STREAMON, &type); CHECK_V4L2ERROR(ret, "VIDIOC_STREAMON"); } static void mainloop(void){ int ret; struct v4l2_buffer buf; int index; while (1) { frame_count++; printf("******************Frame %d\n", frame_count); fd_set fds; struct timeval tv; int r; FD_ZERO(&fds); FD_SET(dev_fd, &fds); /* Timeout. */ tv.tv_sec = 2; tv.tv_usec = 0; r = select(dev_fd + 1, &fds, NULL, NULL, &tv); if (-1 == r) { if (EINTR == errno) continue; perror("select"); } if(r == 0){ fprintf(stderr, "Select timeout\n"); exit(1); } memset(&buf, 0, sizeof(buf)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_DMABUF; ret = ioctl(dev_fd, VIDIOC_DQBUF, &buf); CHECK_V4L2ERROR(ret, "VIDIOC_DQBUF"); index = buf.index; //process by ocl and show on screen by libva process_show_frame(index); //Then queue this buffer(buf.index) by QBUF buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_DMABUF; buf.m.fd = import_buf_fd[index]; buf.index = index; ret = ioctl(dev_fd, VIDIOC_QBUF, &buf); CHECK_V4L2ERROR(ret, "VIDIOC_QBUF"); } } static void stop_capturing(void) { int ret; int type = V4L2_BUF_TYPE_VIDEO_CAPTURE; ret = ioctl(dev_fd, VIDIOC_STREAMOFF, &type); CHECK_V4L2ERROR(ret, "VIDIOC_STREAMOFF"); } static void uninit_device(void){ free(import_buf_fd); free(import_buf); int ret = close(dev_fd); if (ret) { fprintf(stderr, "Failed to close %s: %s\n", vo.dev_name, strerror(errno)); exit(1); } } int main(int argc, char *argv[]) { analyse_args(argc, argv); init_device(); initialize_va_ocl(); init_dmabuf(); start_capturing(); mainloop(); stop_capturing(); release_va_ocl(); uninit_device(); return 0; } Beignet-1.1.1-Source/examples/libva_buffer_sharing/libva_buffer_sharing.cpp000664 001750 001750 00000037732 12576733264 026334 0ustar00yryr000000 000000 /* * Copyright (c) 2012, 2015 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "va_display.h" #include #include "utest_helper.hpp" #include /* getopt_long() */ typedef cl_mem (OCLCREATEIMAGEFROMLIBVAINTEL)(cl_context, const cl_libva_image *, cl_int *); OCLCREATEIMAGEFROMLIBVAINTEL *oclCreateImageFromLibvaIntel = NULL; const char *input_nv12; const char *output_nv12; int frame_size; int picture_width, picture_height; unsigned char *newImageBuffer; VADisplay va_dpy; cl_int cl_status; VAStatus va_status; bool putsurface=true; static const char short_options[] = "i:r:o:h"; static const struct option long_options[] = { { "input", required_argument, NULL, 'i' }, { "help", no_argument, NULL, 'h' }, { "resolution", required_argument, NULL, 'r' }, { "output", required_argument, NULL, 'o' }, { 0, 0, 0, 0 } }; #define WIDTH_DEFAULT 256 #define HEIGHT_DEFAULT 128 #define CHECK_VASTATUS(va_status,func) \ if (va_status != VA_STATUS_SUCCESS) { \ fprintf(stderr, "status = %d, %s:%s (%d) failed,exit\n",va_status, __func__, func, __LINE__); \ exit(1); \ } #define CHECK_CLSTATUS(status,func) \ if (status != CL_SUCCESS) { \ fprintf(stderr, "status = %d, %s:%s (%d) failed,exit\n", status, __func__, func, __LINE__); \ exit(1); \ } static void usage(FILE *fp, int argc, char **argv) { fprintf(fp, "\n" "This example aims to demostrate the usage of gpu buffer sharing between libva and Beignet.\n" "The result will be shown on screen if you haven't specified -o option.\n" "The input and output file are nv12 format.\n" "Please use the following command to see these files:\n" "gst-launch-1.0 filesrc location=file_name ! videoparse format=nv12 width=xxx height=xxx ! imagefreeze ! videoconvert ! video/x-raw, format=BGRx ! ximagesink\n" "(Please install gstreamer1.0-plugins-base, gstreamer1.0-plugins-bad, \n" " gstreamer1.0-x by apt on Ubuntu, in order to use gst-launch-1.0)\n" "For more details, please read docs/howto/libva-buffer-sharing-howto.mdwn.\n" "\nUsage: %s [options]\n\n" "Options:\n" "-i | --input= Specify input nv12 file name like /home/xxx/in.nv12\n" "-h | --help Print this message\n" "-r | --resolution= Set input resolution\n" "-o | --output= Specify input nv12 file name like /home/xxx/out.nv12\n" "", argv[0]); } static void analyse_args(int argc, char *argv[]) { input_nv12 = NULL; picture_width = 0; picture_height = 0; output_nv12 = NULL; putsurface = true; int c, idx; for (;;) { c = getopt_long(argc, argv, short_options, long_options, &idx); if (-1 == c) break; switch (c) { case 0: /* getopt_long() flag */ break; case 'i': input_nv12 = optarg; break; case '?': case 'h': usage(stdout, argc, argv); exit(0); case 'r': sscanf(optarg, "%d,%d", &picture_width, &picture_height); break; case 'o': output_nv12 = optarg; putsurface = false; break; default: usage(stderr, argc, argv); exit(1); } } if(!input_nv12){ input_nv12 = INPUT_NV12_DEFAULT; } if(picture_width == 0 && picture_height == 0){ picture_width = WIDTH_DEFAULT; picture_height = HEIGHT_DEFAULT; } return; } static void initialize_va_ocl(){ int major_ver, minor_ver; printf("\n***********************libva info: ***********************\n"); fflush(stdout); va_dpy = va_open_display(); va_status = vaInitialize(va_dpy, &major_ver, &minor_ver); CHECK_VASTATUS(va_status, "vaInitialize"); //ocl initialization: basic & create kernel & get extension printf("\n***********************OpenCL info: ***********************\n"); if ((cl_status = cl_test_init("runtime_mirror_effect.cl", "runtime_mirror_effect", SOURCE)) != 0){ fprintf(stderr, "cl_test_init error\n"); exit(1); } #ifdef CL_VERSION_1_2 oclCreateImageFromLibvaIntel = (OCLCREATEIMAGEFROMLIBVAINTEL *)clGetExtensionFunctionAddressForPlatform(platform, "clCreateImageFromLibvaIntel"); #else oclCreateImageFromLibvaIntel = (OCLCREATEIMAGEFROMLIBVAINTEL *)clGetExtensionFunctionAddress("clCreateImageFromLibvaIntel"); #endif if(!oclCreateImageFromLibvaIntel){ fprintf(stderr, "Failed to get extension clCreateImageFromLibvaIntel\n"); exit(1); } } static void upload_nv12_to_surface(FILE *nv12_fp, VASurfaceID surface_id) { VAImage surface_image; void *surface_p = NULL; unsigned char *y_src, *u_src; unsigned char *y_dst, *u_dst; int y_size = picture_width * picture_height; int row, col; size_t n_items; n_items = fread(newImageBuffer, frame_size, 1, nv12_fp); if(n_items != 1){ fprintf(stderr, "Haven't read expected size data from file\n"); exit(1); } va_status = vaDeriveImage(va_dpy, surface_id, &surface_image); CHECK_VASTATUS(va_status,"vaDeriveImage"); va_status = vaMapBuffer(va_dpy, surface_image.buf, &surface_p); CHECK_VASTATUS(va_status,"vaMapBuffer"); y_src = newImageBuffer; u_src = newImageBuffer + y_size; /* U offset for NV12 */ y_dst = (unsigned char *)surface_p + surface_image.offsets[0]; u_dst = (unsigned char *)surface_p + surface_image.offsets[1]; /* U offset for NV12 */ /* Y plane */ for (row = 0; row < surface_image.height; row++) { memcpy(y_dst, y_src, surface_image.width); y_dst += surface_image.pitches[0]; y_src += picture_width; } assert(surface_image.format.fourcc == VA_FOURCC_NV12); /* UV plane */ for (row = 0; row < surface_image.height / 2; row++) { for (col = 0; col < surface_image.width / 2; col++) { u_dst[col * 2] = u_src[col * 2]; u_dst[col * 2 + 1] = u_src[col * 2 + 1]; } u_dst += surface_image.pitches[1]; u_src += picture_width; } vaUnmapBuffer(va_dpy, surface_image.buf); vaDestroyImage(va_dpy, surface_image.image_id); } static void create_y_image_object_from_libva(VAImage *surface_image, VABufferInfo *buf_info, cl_mem *yio_p) { cl_libva_image info_image; info_image.bo_name = buf_info->handle; info_image.offset = surface_image->offsets[0]; info_image.width = surface_image->width; info_image.height = surface_image->height; info_image.fmt.image_channel_order = CL_R; info_image.fmt.image_channel_data_type = CL_UNSIGNED_INT8; info_image.row_pitch = surface_image->pitches[0]; *yio_p = oclCreateImageFromLibvaIntel(ctx, &info_image, &cl_status); CHECK_CLSTATUS(cl_status, "oclCreateImageFromLibvaIntel"); printf("\nSuccessfully create ocl image object from y plane of VASurface...\n"); } static void create_uv_image_object_from_libva(VAImage *surface_image, VABufferInfo *buf_info, cl_mem *yio_p) { cl_libva_image info_image; info_image.bo_name = buf_info->handle; info_image.offset = surface_image->offsets[1]; info_image.width = surface_image->width / 2; info_image.height = surface_image->height / 2; info_image.fmt.image_channel_order = CL_R; info_image.fmt.image_channel_data_type = CL_UNSIGNED_INT16; info_image.row_pitch = surface_image->pitches[1]; *yio_p = oclCreateImageFromLibvaIntel(ctx, &info_image, &cl_status); CHECK_CLSTATUS(cl_status, "oclCreateImageFromLibvaIntel"); printf("\nSuccessfully create ocl image object from uv plane of VASurface...\n"); } static void store_surface_to_nv12(VASurfaceID surface_id, FILE *nv12_fp) { VAImage surface_image; void *surface_p = NULL; unsigned char *y_src, *u_src; unsigned char *y_dst, *u_dst; int y_size = picture_width * picture_height; int row, col; va_status = vaDeriveImage(va_dpy, surface_id, &surface_image); CHECK_VASTATUS(va_status,"vaDeriveImage"); va_status = vaMapBuffer(va_dpy, surface_image.buf, &surface_p); CHECK_VASTATUS(va_status,"vaMapBuffer"); y_src = (unsigned char *)surface_p + surface_image.offsets[0]; u_src = (unsigned char *)surface_p + surface_image.offsets[1]; /* U offset for NV12 */ y_dst = newImageBuffer; u_dst = newImageBuffer + y_size; /* U offset for NV12 */ /* Y plane */ for (row = 0; row < surface_image.height; row++) { memcpy(y_dst, y_src, surface_image.width); y_src += surface_image.pitches[0]; y_dst += picture_width; } assert(surface_image.format.fourcc == VA_FOURCC_NV12); /* UV plane */ for (row = 0; row < surface_image.height / 2; row++) { for (col = 0; col < surface_image.width / 2; col++) { u_dst[col * 2] = u_src[col * 2]; u_dst[col * 2 + 1] = u_src[col * 2 + 1]; } u_src += surface_image.pitches[1]; u_dst += picture_width; } fwrite(newImageBuffer, frame_size, 1, nv12_fp); vaUnmapBuffer(va_dpy, surface_image.buf); vaDestroyImage(va_dpy, surface_image.image_id); } static void load_process_store_nv12() { frame_size = picture_width * picture_height + ((picture_width * picture_height) >> 1) ; newImageBuffer = (unsigned char *)malloc(frame_size); VASurfaceID src_surface_id; VASurfaceAttrib forcc; forcc.type =VASurfaceAttribPixelFormat; forcc.flags=VA_SURFACE_ATTRIB_SETTABLE; forcc.value.type=VAGenericValueTypeInteger; forcc.value.value.i = VA_FOURCC_NV12; va_status = vaCreateSurfaces(va_dpy, VA_RT_FORMAT_YUV420, picture_width, picture_height, &src_surface_id, 1, &forcc, 1); CHECK_VASTATUS(va_status, "vaCreateSurfaces"); //load FILE *in_nv12_fp; in_nv12_fp = fopen(input_nv12, "rb"); if (in_nv12_fp == NULL){ fprintf(stderr, "Can't open input nv12 file\n"); exit(1); } fseek(in_nv12_fp, 0l, SEEK_END); off_t file_size = ftell(in_nv12_fp); if ((file_size < frame_size) || (file_size % frame_size) ) { fclose(in_nv12_fp); fprintf(stderr, "The nv12 file's size is not correct\n"); exit(1); } fseek(in_nv12_fp, 0l, SEEK_SET); upload_nv12_to_surface(in_nv12_fp, src_surface_id); fclose(in_nv12_fp); printf("\nSuccessfully load source nv12 file(\"%s\") to VASurface...\n", input_nv12); //create two corresponding ocl image objects from source VASurface VAImage src_surface_image; va_status = vaDeriveImage(va_dpy, src_surface_id, &src_surface_image); CHECK_VASTATUS(va_status,"vaDeriveImage"); VABufferInfo buf_info; buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM; va_status = vaAcquireBufferHandle(va_dpy, src_surface_image.buf, &buf_info); CHECK_VASTATUS(va_status,"vaAcquireBufferHandle"); cl_mem src_y, src_uv; create_y_image_object_from_libva(&src_surface_image, &buf_info, &src_y); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &src_y); create_uv_image_object_from_libva(&src_surface_image, &buf_info, &src_uv); OCL_CALL (clSetKernelArg, kernel, 1, sizeof(cl_mem), &src_uv); //create one target VASurface & create corresponding target ocl image object from it VASurfaceID dst_surface_id; va_status = vaCreateSurfaces(va_dpy,VA_RT_FORMAT_YUV420, picture_width,picture_height, &dst_surface_id, 1, &forcc, 1); CHECK_VASTATUS(va_status, "vaCreateSurfaces"); VAImage dst_surface_image; va_status = vaDeriveImage(va_dpy, dst_surface_id, &dst_surface_image); CHECK_VASTATUS(va_status,"vaDeriveImage"); va_status = vaAcquireBufferHandle(va_dpy, dst_surface_image.buf, &buf_info); CHECK_VASTATUS(va_status,"vaAcquireBufferHandle"); cl_mem dst_y, dst_uv; create_y_image_object_from_libva(&dst_surface_image, &buf_info, &dst_y); OCL_CALL (clSetKernelArg, kernel, 2, sizeof(cl_mem), &dst_y); create_uv_image_object_from_libva(&dst_surface_image, &buf_info, &dst_uv); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(cl_mem), &dst_uv); OCL_CALL (clSetKernelArg, kernel, 4, sizeof(int), &picture_height); size_t global_size[2]; global_size[0] = picture_width; global_size[1] = picture_height; OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL); OCL_CALL (clFinish, queue); printf("\nSuccessfully use ocl to do processing...\n"); va_status = vaReleaseBufferHandle(va_dpy, src_surface_image.buf); CHECK_VASTATUS(va_status,"vaReleaseBufferHandle"); va_status = vaReleaseBufferHandle(va_dpy, dst_surface_image.buf); CHECK_VASTATUS(va_status,"vaReleaseBufferHandle"); OCL_CALL (clReleaseMemObject, src_y); OCL_CALL (clReleaseMemObject, src_uv); OCL_CALL (clReleaseMemObject, dst_y); OCL_CALL (clReleaseMemObject, dst_uv); vaDestroyImage(va_dpy, src_surface_image.image_id); vaDestroyImage(va_dpy, dst_surface_image.image_id); cl_kernel_destroy(); cl_ocl_destroy(); if (putsurface) { VARectangle src_rect, dst_rect; src_rect.x = 0; src_rect.y = 0; src_rect.width = picture_width; src_rect.height = picture_height; dst_rect = src_rect; //XXX There is a bug of X server which will cause va_put_surface showing //incorrect result. So call va_put_surface twice times to workaround this //bug. va_status = va_put_surface(va_dpy, dst_surface_id, &src_rect, &dst_rect); va_status = va_put_surface(va_dpy, dst_surface_id, &src_rect, &dst_rect); CHECK_VASTATUS(va_status, "vaPutSurface"); printf("press any key to exit\n"); getchar(); } else{ //store FILE *out_nv12_fp; out_nv12_fp = fopen(output_nv12,"wb"); if ( out_nv12_fp == NULL){ fprintf(stderr, "Can't open output nv12 file\n"); exit(1); } store_surface_to_nv12(dst_surface_id, out_nv12_fp); fclose(out_nv12_fp); printf("\nSuccessfully store VASurface to dst nv12 file(\"%s\")...\n", output_nv12); printf("\nNote: The input and output file are nv12 format.\n"); printf("Please use the following command to see the result:\n"); printf("gst-launch-1.0 filesrc location=%s ! videoparse format=nv12 width=%d height=%d ! imagefreeze ! videoconvert ! video/x-raw, format=BGRx ! ximagesink\n", output_nv12, picture_width, picture_height); printf("(Please install gstreamer1.0-plugins-base, gstreamer1.0-plugins-bad,\ngstreamer1.0-x by apt on Ubuntu, in order to use gst-launch-1.0)\n"); } //release resources vaDestroySurfaces(va_dpy,&src_surface_id,1); vaDestroySurfaces(va_dpy,&dst_surface_id,1); vaTerminate(va_dpy); va_close_display(va_dpy); } int main(int argc, char *argv[]) { analyse_args(argc, argv); initialize_va_ocl(); load_process_store_nv12(); printf("\nExample run successfully!\n"); return 0; } Beignet-1.1.1-Source/examples/libva_buffer_sharing/256_128.nv12000664 001750 001750 00000140000 12576733264 023064 0ustar00yryr000000 000000 FDFGGGGIIIJHKLKLNLLOMOOOPRA)BNQPRRRSRRUTVXVTUVXWWYVXWXWYXXZYZYYYZZ[[[\[ZZ\[Z\\^]]]]]^]^^^^^____`_`_`_```a``acaaacbcbdcabdbccbdcdcdcadcdbccdbccdbdddccddcdcdabcbcbcbccbbcaabdcbbLhiututtuuusqomknopsqrstuuututsrqoponnmlkkkjjvWY[ZZZZZ[YZYXXYWWWWX[YSg_[CGGEHGHHIJHILJLLLNNMMOPOPO:(EPPRQRSSSTUTTTVWVVVVXWXVYXXXYXYYZYZZZZY[[Z[Z[[]\[[[]\\^]]^_]__^^^____a``aabaabababbaabaabcacbbabbcccddddcbcddbdcddccdddcdccdcdbdbccbcdcccbbcbcbbdb`Lhiuuuurqopqrsqtttuuuutvsrrqpoonmmlijiihfddb`wXX[Z[ZYZYZYXXXXXWWWWWYYQ_FFDFHGFGIJIIHHLJLLLLOOOOPP3+HOQPRRQSTSTUUUVTVUWXXWWYYYYYYXXYYYZZ[Z[[[[Z[[Z[\[^][]^]^]_^^^]^]__^```_aa``aaababccccbdccdcccdccddddddddecdcdddccdcddddcddedccdcdcddccdeccbbabbcddaKggqsttuvwvwxxwxvvssrqponommlkkjjhgfc`]ZUROLKxwYX[[[ZZZYYYXYXXXXXWVWVXVeZECFGFGFHIHIIIJJKMLMNMOONOP,-KOQOQRSRUSUUUVWVVWXUWXVXYYYYXYYXXZYZZ[ZZZ[[\[[[[\\Z\]]]\^^_]_^^^___a_`a`a``baab`cbbbbcbbcdccccacddcbccdddddcddddddddddcddddedbdcdbcddcccbbbbabccccaIghwyy{yyzyxvussrqpopnnmmmiihfda^YURNLJJMOQTUwyZU[\\ZZZYZYXYXXWWVVUVVVUQOCCEFEGEGHHIJIIJJKLLMOOOPPL%/KNPQQRRSSTTRUUVVXVVVWWYWYYYZYYZYZ[ZZ[Z[ZZ[Z[[\Z\\^]]\]^^]^]__^_______a`_a`bb_cabacbccbcccbcbcdcdddccbddcddddddddeddddddddddcddcdcccdcdbcababcbbbbdbKfhy|zyxvutrsqppnommljihec_[WRNKKNORUUVUTTTVVvÑ}~z\UZ[Z\ZZZZZYXXXWWWUVVTTUSOCDDEGFFFGIHJHIJJKLLLMNONOL"0LPQQRSSSSUUTWTUVWVWWVWYYWYVXYYYYYZYYZZ[[Z\[\[\\\\\]\\^]^]^^]______``___`a`aa`cbcccbcbbbbdccdcddddbedddddddddddddddedefeeddedddcdccdddddddcdbbbbbcccMefvwtttrqpppnmljifc`[WSPLNNQTVWVVTUVWVWYZZZ[u~}{^SZ[[Z[ZZZYXYWXWWVWVVVTTTSCBDFDEEDFEHHIJIJLKLNLNNMQL 4NOQPRRSTSSUUUWUVVUVXWWXXYXYYYYZXZZZZY[[[Z[[[[\[\[\]]__]^^^^]^^]___``b`_babb_`bcbcbccbdbcdddcccdddddedeedddddeedeeeeeeeedeeddddcddddcdddfdccdccbcbcbJefrsrrqooljfe`\XSPNNPTWYZXUWVWXYYZ[[\\\\^^^_qĘ~}}|`S[[Z[ZZZZZZYXXWXVVWUUTUTTBBDDCFFHGFIHHHIKJKJKMMNNNF!5LOOQSRSSTTTTWRUWUVVWWWYXXYZYYZYZYYZ[ZZ[[[[[\\\[\\]^^^^]`____^__`_``_```b``abbcdbcbccbccddddddddcdedddeddeefeefffeegeddgdedddefdfcddddedddcddcbcccdcLdfoonlhc[SSRONQVY[ZYYXXYYZZ[]\]^^^^^^_____``n~}}{bO\ZZ[ZZZZZYYYXWXWSVUUTTTSEBBCDFEIFGFGIGIJJKJJHKMMMF$:NNPPRSRSSTUTTVUVWWWWVXWYYXYYYZYX[Z[Z[ZZ[[[[^[]\\]\\]_^^^_^\___]__a_aa`ababbbbabcbbccbbddcdcdcddddeddddeddfeeefedeffedfeefeeededdddddddddccdddcddcdcOcdb^YUQNPSZ]][Z\[Y\]]_^__`____``a`_`b_``baaci}}}}dT[[Z\[ZZZYZYYXXXWWVTWfVTT56&17@CEGHHGIIJJKKLNNMA&>NONQRSRSTTTTUUUVWTWXXVXYXXZYZZZYZZ[[ZYZ[[[Z]\[]\\]]]^_`__^___^_``_```aaaabbbaccbccbddcddcdddeddddededeeeeeefeeeefdgefgedeffeedeeeedededdddcdcccccddOcdVV[aa]VQ\^^^__aabb`aabaaabbabbbbbcacaccbcdk~}}|gTZ\\[[[Z[ZYYXYXXVWVQcfbUT *+%,: "#"#&)-149@CHKKLNL=&?MMPOOQRRSSSSTTUVVWVWXWYYYZYYZZZYZYZZZ[[Z[\Z\[[]]^]^]]]]^^`]____^`````aabbbbcbbbcccccddddceddedddddddeefefeeeddegeefefffefffeeedfeddeeddeedddcdddcddQcda`^__`\Tbddedfcedddcddddcddcdcddddddddddeej}{~|iU[[\\\[Z[ZZYXYYYXWWM\EWUU"%.$"( !"!"#""#$$&$%&+/373',BLOPOPQPRRRTTTUUUVWVXWWYXYXXYYYZZ[\Z[\Z[\[\\[[\]]]^\^^^^`]_______````aaa`cbccadccbcccddddddedddddcdfeegeefeeefegfgfffffefffeeeffefffdedeeddddddecccdR`eefhhhhcYeghffffggfeeeffgedededeedeefefeffgl~{|}zjV[\]]\][[ZZYZYYXXXVKQCJ_T" !""$"" ""#"#$$%$$$%$$%%&'''*.26;AHPRRRSSRTVWVUWVY[XZYYZXZZZZZ[[[[Z[[\]][\]]]^^]^^^___^__`_a_a``ab`aabbbcbccdcddbedddgdedeeddedeeefffgffgdffegggegfggfghffffeeededddeedddddcdccQ`dkklllki\ijjjiiiiiiihghihhghggggghggghhhiigl~{|}{kXZ]^\\\\[ZYZYYXYXWWKYHEbV"!""%$""""""##$$%$$$$$$%%$&&%%&&'(()+),28Wq]Z,.30nk`aaa`aabacbbdcddededddefdefdegggffhighhhiiiiijhiijjjjjlljlkkjlllklllkkkkkkjjijiiihiihbQa`aabdffhjmooopqqstwy{}{gw`X=aqcV`aa```__^^^]\][[ZZYXXXX,,-.-..///1/111121334364455656678889:9:998::8~yzzxwvtwuvuvxyz{}Q|iva<-.4:faa`aa`baaccbbddddddeeeeedfefeggfgghiiihhghihijikjjjkkjjjlklkklllllljlllkljkjjjjijihhh`ObaadbddefhjlmnoqqstwzsixpaHIkfZ_caaa`_`_^_]]Z[\ZZYXYWW--...////0/0112222344555565677788989:989899;d`aa_a`acccabccccegrsuritnigigbb_`^Z]\XYYYWXUSZYUVWUURMPownllllllmklklmlmkkljjiiiiicLaa`bddfeffghh~ś˜E_nqcXxws_S;TZ`baabaaa``^^^]\\[ZZYXXW-2..00//01/11223334366666687779::::;;989::::@sxx|~gxzxvuvuvuvxxzx{|L0=QPso0/4>e`aa`aababacd|rOIF=OLMOONNOOQPPQQQQPQQTRQRPSRQRQPOQOQONPMQXcmyrmmmnmmlllllljkjjijjiicMaa_bddffgfe_Uqtmnfgi_]WQG3@atdJ{wutj]@>V`ccbbaaaa`__^]]\[[[ZYXX/.../0/001013334454465557878898::;:;;:;::<;;@quvxvoonoqrsstuwwyyz{y~|ymYeErxxH7:y_`````baanL5--PiVNLNOOONORPONUG@HHHKEDGMRRVc\^dam`fT];=ADHMUcymlmklimmlkkjjjkkjjifMbbc`cfggfXI>7>FAAF;313,)).)'5^rcFwvtsp`Y2B_ccccbaaa`__^\]\][[YYXW./.0000/11212243464667677879899::;;;;;;:;<;<rwxzxppSfidnoonmoqvxxk>--y}|~ysgba```a[JPhfgtDA@BiklkkQSRRRjhiiiDBACGiiiiiPQQRWqrqyFIH~||^`_}|rmmmmnmmmmlkkjjljieOaaa\\bedbbfhhpY+-QG:654BghM67WhdȚvronmj]T4Adddcca_a`a__^_]\[ZZXY./101211123334585878889:99::;;;;;==<==<==>>?9{q{aqiYY^jlnstvx{zwE))-w~c``__`ZJQgggqDA@DifgggPQOQPgffggEA?@FhhfhhRQQSXooqxFHI||]``}}nmmmmmlmllllkkjjjiePag|pvq~gfgmncNWhaY[adinlQAFOPLM_̪{tronmkc[F5Vdddcaba```_`]][[[ZXY//2121112334656788899:::;;;;<;=<<=>=>>?>>>??;sk{f.^4-epsuvvxz|{zs[Agaxb`_```XKOgffoHIIFhggffRRQTQhfgghDBADHffgfgQRTUXnoqxXHH|}da`}}xmnnmllmmmllklkkjkfLbg|smlflihgggdccabc^J?<>@CEGAJ|o]XUOiqsqpnlkcZY06bedccbba``__^]][\[ZZ/012331233556677888;::;;<<<===>?>>=?????>?@@9Jsyxtoa4*5lsuuvxyy{|}}wH/*7``a_abYLOL>UN66558lihiZ555559liiCQ44444ShjSWQ3564FzHHQhy{d`_~|ummmnmmlnmllkklkjkfPbafb`[YX\\]_adginnpqtstrjA559@?C?:6@?=31/+3?BcqtqonmkcW[Q3Idedebcaaa`_^^^\[[ZY100232334467677899::;;<<====>????@@@A@@@A@AA:*wwvtad4(:Mclqtvxz{}~~zH,-r}w}aa`c`aF[VSU[44nX6=cXZb:8K[90;:j_i:3|pnnlmollllllljjjifPabfhieagifeb_\YWRQKIFDB??9217;><:529>M311+/7;\jtpnnmkdWY\;5`cdccbaa```_^\\\[ZZ/01244344466779::;:;;>>==?>??@@@A?AABAACABBB<'w|}k+]jllgV+Kmswzz|~|ulrL}wmiv``_`a`Z[Fy6fjkgk\lkM=ASWX5~YW\I9DWUZ3PZV\FANYUM;3YWaUU_XfL6`a`f8a:pl]mnmnmnmmllllkkjiifOa^IFC?><9754321001012112874661001024=U756/-13?XqpmmlkdV^\W2>eddcbac`___^^\[[[Z213344456678989:;;<==><>>@?@AAAABABCBCBBCCDC:"zlWwyyxutspc(bty{}}T+*Uzmhek_`ba`aBF^iC@A>j<88847T5569766Y~637873:b\648743b@==Ycm_lhkmnmmllmmlmlllkkjieM`\;43554442101121033434472042-,-////8G8651-211KqpmmkjdX_d]H3Reddcbbb`__^[]\\Z[223445576789::;;=<==>?@@?AAABCBCCCDEDDCEDDED;#~R}}|zywxxvsm9]tz}s1-~yLx}rhedj__`aaa?:4>BB=ol;:8775W6567765rS/5686745988852:?=;|39=`mmlnnnmmllllllkjjfOa_PLJGFCEF8.02233344544;C7122/-/1.-.6M9662-430LpnllkjcW_i_Y39bedccbaa`_^]]\[[Z3444566778::;;<===?>?@AAABBCDDDDEDEFEGGFFFFE<(x2|~|zxwwwwun0gw}(.axmfdcja`aa`a=??@@BBABCCDEFFGGHFGGGHHGGHHH=%zj}yxwxwxxul(pz}vUTzrjecbk```_aasGVLMQ]PRRZZ]]^a^^]\]\]^6]]^]^]^fbe_^INdGHaNNMOSglllmmmmlkllmnnkmfPazF4677678689;Af_f\TRKA9Q|]<:950562LomlkkicY^iha[<4Yeedbb`__^^^]\[Y45667889:;==>>>?AABBCEDDEFGGHIHHIIHHJJIIJIJJ=%w~}}{ywuquwxt]Vx}..||wmheebn_`_`aasHIZSNXQ[UU]_]b]e^af^\\^6]]]]_aaa_^_`MMO]IbLJMKAtlllllmllmlsuPbmE698989::::;=XtrPToR=;;92753MomkjijcZ^ijg[U0;bddcbaabd]^^\\[556789::<=?AAABCEEFFHHHHJJJJKKKJKMLKKKLLK?&-mt}}{yuU<_vywr&s}zfgtrkgedbn``_`a`nJLOLLZPQV\Wq\_[^_^]^]]]7^]]^^]``d^aGKMNNcLPQNVslllklmmosʼٽRbxG8;:;;;<<===?GprPOF=>?;3:55MokkjjjcY`jiid\I2Nddcb``de^^[\[Z47779::<<>>@A@BBDEFHHIHJJIJLLKMMNMMMNNMNMNNM@28*j}|{zu38rxxyt6l}_/lmihdc`h_aa_``mJLNKN[TSW]Y_c]c`c^_^]_7^^]]_d_i_`_LXJMcLKLXmklknls̻WbpD:>=AKQLMDCA@Dy|d\UPNW[RHA>A@<4<39NnljjiicZ_kiii_X/8bddcablf^]][\[78799;<=>??ABCCCFFHIIJKKLLNNNOOOPPPQORPPRPPP@Gai||{ywsrwyyyv_bzwgnhWcb`[e`_`_`amFGHOM\QZQS|l`cb^_a^]]^5\\^^bb^_`fyWKGR`LRMLgkpvmkǠXqG:?OP_efj^cYTR[_QB@@@ACFGGHEFJNe`;8KpljiiibZ`kkkjh]O2Abccfwk]]\]\Z889:;<=?@AACCDFFHHJJLMMNNOPPQRRRSSSSTTSRSSSR@kq~|{wwwxxxyywhIpvwx{}yslc\_YXR\_`````l^MLQR\\WZ\^ab]UZa]]_6___dcbcde`MQRVaNYO[rſlhiiizpB;@FmQbTP\`OgjOC>>>==?ABBACBBH_ANnkjiihd]`llkkjcZ95Xchuzf]\\[[9:;;==?@BBEEFGHIKKMNOQQQSSTTUUUUVWXWWWWVWUVTBrxzneiotxxzzzxl>koqrruvwxyzyxvsnhbXT<:DLS_`__``hkEMIM\NOeV~t_c_Qcf[__8^]_^_`e`_|DLMK`NKQ^Ľu|eoфW`TQdZLb]NBXym?>>=;=?@CWdJmllihhb]alnllkk^S/=j|r_\[[\:;<<>?@ABDEHHIKLMNPPSSTTVWWXXYYYZZZYZ[YYYXYWAtznrxwuoVtxyyzxk@loqqstuuvxywtsold`Y>:@BDO______knONPL[PVNOXGCNMfa]]7_^__d`^_fZ]^`JXLZ½ĺgs}ʎG28CGDRyaIBVx@?>?>>>>@RtfYmkjiiid`annnmnlg\B7[~~}m\VY_;=>>?AACEFHIKLNOQRSTUWXYYZ\\\\]]]]]]^^]]]]\\Vuurz{zxtlVuyyzymNloostuvuvxwutrojda^YXTSNW__^`_`if?WNOZQWPJMDKIHbW`8_____e]`s\~PcPNK˭ZX[^dfimovnzI:?LWJssQb|y{wrnd^jxf`mkhhhgdacnoponmlaV.=}}}^e{=>>@ADDEFIJLNOQQTTXYYZ\]^^_``_aaaaaacbb`a`_^hvg;rwwsk4:vyzzvW]npqruwvwxyxvtroiea`][YVRW^___^ZjI;GbKXOMP]MCDtFcH_^7_^_``__VdfpkSNPʼȫSRRLKLLKKIKKYJTDYab~zWY~|{z{vwtmX\liihhgdedoqqrpoooZJ6Q|ywx>?ABCDFHIKNOPSTUYY[\^``abddfeeffffffefefcccbewF+,IU<,+fv{zzr$forqrutvxxyyxurpjfcb`_^\WK__VKLHl=:F?MZNSeEKQHTEKZ9`]^_gaqcgmdOV^թTROIIKFC>>GHOJ`jdgjw}o=Zc^cihkehhda^[S>HljfggeegdpqsrrrsqX4;l\pOCW\XL`GBjDb5_^_j_Zo|pccaQN{UQLIII@9=C@FGHHGLuS[CPcp^y]3Xx333567eNKHGGD[EHMHNYaNNhhkwr~|׫r`cd^SSSPwkI:EMGCCEFGDCBESL\cDemPis7WT09BCHJD:453/*07Jjihedddhbkoqo~wVAxGJLNQSW[^achknrv{}<*+! .C-Zkrx{|}~~wtolhea\VK/0//12lm<,,37:;;;2++();CKiigfedb\[a`\Xa~İx79:==@CFIJMPUX\_dgkmoqqqqpqqrrtututroqonmmljvHV*VD[Q8\Qty|||xrmkgd_ZSL@?=;:gkmCDHQJKwfUQ]\]_^_5^__^^_]LYWq\ZSZ]\``UaZWhiͷvqwmT4Uw@<@?GHKK=5.'&%+3Gihfedc`TQXVMB74@qie}50+%'*1=J[X[`chotz}|}|zx;b7\oD=Gwz|}yrnjfb^YSI:;9448hkmDEIOJJV|Xa^_^___9______hp]vWUwYUTttxwvvuuwžHXZF4XqVK<74.,/9Qjkjfdd_QIo{}~|K4-2gqposqrlt::>?AIKD,$$).6CQdy|vvljedgirujaYOHHGHIOQOGGv||)b3kvuoqxz|wqmhe`]XRH6;77CSfjcBDGSHKO__`_^_a;^^____`[WVtVUS_rnlrprcMB4fec_N@s~M*((,KSVX[\>7e///00002223433.*%&''08?GPYhb\OFA=:;;::;;;;;;>+V}}u[lqy|~~x@''((/+((++%%+..0011114453588;;=>ABAA>;0,'%'')+*+++-257=@D@{)NvugOjdYouvvuUPnj1q|ysmgc_\VM=HnWE@??cfC@BGPHHJ^^_`_`^_8`__`^``aTRSoSQPpqrkp~»̧Kk򔈂etjxxm*(((,,%'*****:05<966577>763;<=B:555A2:72005;5105458221<.4:o(Nun'1cC-7fkml`mkqcirtvuxxxyxxwtrnhc^[WRPERIā5468bf@CBEQHGJdp__`^__]`<`_____``{QPNiPNOLMO\~xssb½ɕK`~Z2345457677:<=BBCIHMQX^fllqry@677A:9?HP?%&+&&)'-77*-0<`]T1310957./1//8A223708:447:<494?+pruqb^c_#bdjkhToxzhRlqtuvwwwwwtrqmfa]YVRNIJKu<7-,+TKZ[KMTZ^nv~heeeffgffHhgghillqrvxYT[iKUsrojgceĺͷPfivxrtx|x=88765124231/.//022122223457?>932425200//.+30/..,--.-././1244579999:;<<<>=<<;:68#+04DH2+uxvsmknl]hkkiIakutgO8qtrvvvvvvurrlf`[XSPMHBIPJ1/..SLXampuy|o|r}~~}~nnYeorpqrqc~~zwefdcb`Z͵~}Pdcabbbdv\E=5,../202234333323343222232211111/007r,)AGXdgijcYSEBBB?;:98:8:9<;:<:=:;<==9<:<<=:=?(sxxusqpnmlllkfd^cIhjnrtsvuuvwvuspjd_ZVROJHEA:7447<]UW>DnqrKit|r]XmpYrnW{oXp~qsRdSjMu]Jylbaba^^_b®KR[cwvnf\N@:2.&'+*+++,-,-+(*+2Lu0011257877655579:;;;;<<==<>?>???@??@@BA@ABBCFdwywvsqppnmllklljG^moqsssvvuwvvusojc]POIEHFB9+**-CHGK4bD@@CR{mqHFIJOJHGLopmw[GHNLJLJIxyprqLJNKJIIK~LMJDB:\\]^_^\[UU\ȿ11////123:;;;;;;;::;;<<<>@=;<;:9577AjPLYv`:eH;~aSYt;DGAAKPSG@;D9?;A8;::99979;878767668nH@@MGclppqtstqstuvvvvxxz}|~~!"!#"&(*)("&'(/;T`v|xvrqmmmlmlmlnmnnmnnooononkjif_YQLHC@>;A-.../0012446897898??;:BA:;CH9AM<<:7:;>=;>9;::9999:7<;<<;<<;:89d_debfea`\^IHKTkoqnorrrpppopnpqsuuxvxz|&'(*)&'# %,5;Qiw}yxusrrqqppoooopononnoonllig`ZTMJGCA?GzuqeZL/-././0033357777888897989=<9;;8988888898:8888899999;===<<=>>>Bxl\XUQQM`q}rlmoppommbklihilnoqssuvx{~~~}}{yxxwvttsrrqqqopnqonmlkfaZTMIGC>;>USMIGEE@94.-.//0245776887788698798898877767787676766566689:9984-H[MgvlbWRMrmldiihYahcehddhjjmlnpstwofgHAR]jfjiijgggeddedcbccddaa]XPFA;>@DHHHGMNMMKJHB>;40049=B6:9=788776776677677777557665866666676977C?%,06>DIR`v~zk^]g^^_]Uaaabccbdefhhklnimr}lcZTQ}~||{yvsqpnkjihikmnljihbbbbdcdfeegb`PJKIGGEFFFGIJKKKKKONORTPRTSYZYZZZZZ\\]^^_`cfkrxytndLMRVWXZ\^^_``a`db[fjlptx~yja[TOLHC~pcXPHCA@HLOQSTVVTPLXbimnpsw||tg_TOJID?<;wk^ROKFB=:723438=ISX]beimoty|of^NC@A?=:76xha\ZYWURPNPPOSUX[^abijpz|tj]LGA>>:9767yvcMA>:;::87::zpYB98689:98::zrL=53989:8:6*z{tP526899;+ ~|{Z25794#}zv86/|{yb$¿wysUyspW!rmT$ym[+mhA!}gQ-r[?$iM¿¿¿}~~~~~~~~~~~~~~~~~~~~~~~~}}~~~~~~x}}~}~~~~~}~}~~~~~~~~~~~~}~}}{z}~|~~~}~~~}~~~}~~}~~~~~~~~~~~~~~}~~~~}~}||~w~}}~~}~~~~}~~~~~~~~~~~~~~~~~~~~}}~~~z{~}}}~~~~~~~~}~~~~~~~~|~{~}}~~~~}~~~~~~~~~~}~~~}~|~~~~~~~}~~~~~~~~~~~~~~~~~~}z{~~~~~~~}}~}~~~~|~~~~~~~}z~{~~~~~~~~~~~~~~~~~~~~~~z~~~|}~~~~~~}~~~~~~~~~~~~~~~{}{}~|~~~~~~~~~}~}}yy{}|{}y{z|v||{~~~~~~~~~~|~}~~|~|~}~~}~}~~}}~~~~~{~|||{zzzvt~~~|~~~~~~}|~~~~~~~}~~~~~~~~|~~~~~}}~~}yxsq~~~~~~~}~~~~~~}|||}}}~~}~|}~}~~~~}~}~~~~~~~~~}|~~~|~~}||{wu~~~~~~~~~}~~}|||}}|}}}}~}|}~~}~~~~~~~~~~~}~~~~}~|~v~~}}~}||~~}||}|||~~}}~~~}}~}~~~~~~}~~~}~|~{}~|~~~~~|}}|~~}}~{zz{~}~|}~|~~}~~~~~~~~~~~~~~~~}~|~|{||}}vrsrstrstuvsqsvwttutmkr~}{{||~{|{}{~|}|}|}|}}}~~~~~~~~~~}~~}|~~~}~~~wx}`tuuuuttolpqtnonmnmmvrsrou}ryz}~~{}}~}~|}~~}~~~~}~}}~~~~~~~~~~~}~~}}~zsab~wzcvsonptttnnrupmozzqhgrhw}zi~}]{{|{|}{w~~~~~||}|~~~~~}~~~~~~~~~~{~~~~~}~}~}~|{{|}vpc~{zetvsjh}}}xlkxtqll~|~{qkjths|zd~~~uz}}~~}||x}}||}~~}}}~}~~~~~~~~~~}~~~~~~~~~}~|~}~{|{z{yngze~pl|{jx~~m}}mt{n|{h~~}~~{|~}}~}~~~~~~}~~~~~~~}~~{}~|}|~}~}|~~~{~z{{~{}~}z~~~~~}~~~}~z~~{~|~~~~~~~}|~~~~~~}~~~~}~}~}}~~}|}}|z}y{~|{y~|}~}{}}~xx~|~t}{|y}~{{||}}|~~}~~~~~~~}~~}~~~~~~~~~~~~}}~|}~|}|}~}z|||usqpghighxhfihigqpqtnuqtx~~|~~wxvsrmsnrwr~~~~~~}}~}}~}~~~}}~~~~~~~|~|~~~}{~}~~|}z}z|{{stutgifjfxgigieloqnmjprpq|uM\v}~uuuqu~ywvtt~}}}~~~~~~~~~~}~~~~~~~~}}~}~~~~~~}}~|}v{|~|~usrrliijizgfigkqqnphnntsb~~SfvQk}uurov~rtwus~~~}x~~~}~~~}{~~~~}}}}}~~}~~}}~~}}~|}~~~~~v}||~stsrrsmkskyhfiilronngsqrpg{{xrJjvtNt~sqqo~~~wvtyw{{~}wx~~~~}z|}~}}~~}}}|}}}}|}}~}}}~}~~}~~}}~w|{yyprsrqrqtrsziggipoqonhyzxsocut:HuxtxWttttrqz|~{y|xy~|~~~~zx~}|}~~}~|}{||}~}~}|~~|}}~{z{{}|~~~~~~~~~~~}~~~~|{}{z{|psqtrrqrtvyihgopponour{}tkdL1@?JJYD|uwt|zxw~zxsvt~|~~~~~~|~}~}||{{~}}~}|{}~}||~|{|zz~|}{~|}|}}}~~}~}~~~~~~}||||u{wzvwuqsrrqqsvx~y~nlrspqmleez}utfuh}irhZ{ux{wtuf}{xyvw~}~~~~~~~~}~||~y}|zz|~~~~|}}{|~{}z|{}x}z}y~yy~z}|zy~y~|~~~}~~{|}|}~|||}|~wzxzvytusrtqquvtrurptnprmfky|x|ojmonrqcc|y{uytsva|~|ywpw~~~~~}}~}~}{|z~zzy{}{}|{{{}y~{}y|y|yx|wyw|xzxzx|xxx{y~~}~~~|~}}|~}~~~|}~{}x{}z{pstsrtsvusvmvxnpqonn`v|jlv\sfq{y}|xZOOV]``]wzxxw~~~~}~}}}zy~~wwx~}}}}|{{~|~|}yy|y{z}||z~|}{}}}{}|~~~~~~~~~~~~{z~}}}}~}tz}~~ptrsrtspij{hghrmqnnjeddmd\~bHFCGTZdb`\YY^xwt~~~~~yz{{~}~~~}{|}}zyx~x}yy{x{x|v~x~z|~{~z}~}~~~~|}y~|}}~x|||~rrqtrqqgiizhegloonkmdakik`TPQPIJQXYXTTTWWVYtr~tst~~xvuzy~}|~|~~~~~~~}}}|}}}~}}~}}~}~~~~~}~}}y}}vuqrrqshkjyghhipqmmkbqiij]uU`]WOINOQLPOPUSTVYYfx|vvykkiihir~~{yz}~}~~}~~}}~~~~~~}~~~xz|}|vttsuvmiihziiihpoqnmdmmsfk[c`cf=HMMGKLPOQOQSX[n_usx{~~~~}|wz~}~~~~|zy~~~~vuuuvwuv~~~~~|}~~~uu~|w{q|||zj|}nyy{xixhwyxvadmlkh[kfijUVTQOROQUVWU[\[X]f|tw~~|{||{|~}~~~~~~~~}}}~~~{~xxzpxwyxv{nyvyvuxmzvvtqz|ok~{iilnttrWRPPPSSWYTXXY^ao~~~~}~}~~~~~||~}~w~}~~}}y~|}}yljj|z}t]`]X[[XZ\]^a`c~~~~~~~~~}~~~}|~~}|}{|}|}}{{|~|{}|{|zz||sb```]^^_`Z`]ah}}}~~~|~~~~||~||||{z}}~~~~~~~}~~~~}~~~|}}|~}}wwxxxsid`babbcgccaddh~}~~}~~~~~~yzz{~~~~~{~~|~~~~}~~~~~}}|||{||zxzxxyxxhilfhfhieel~|~~|~~~~}{}||~~}~}|~}}}{|z{{z{{{z{||}}}|}{|{z{zwyzyzwyzxyyyyyzzw{xyxxyz{vwwvvutvttwwuqonmoow|~~~}~|}~~{z{~~~~}~}~}|~~|~}{~~|~~|}|~||~~|{|z|y{z{z{{{|xz{z{{|yxz{zyy{x{zyyxxwvvutrtyz{yzwsvuuy}|}}|~~~~}~~~~~~~~~~~~}|}~}~|{|~|}~~|}{|||~~{}}{{zzz{zzz{{zy{yz|yzz{zzzy{zyyyzyzzvywvtvsvuyxutttutu{}~}}}~~{~}~~~~~~~~~~}}}|~~}}}}}}|}~||~||||{z{z|{|{z{|zz{{{z{zyxzzz|yzyzzy{zz{z|zxyxxxwxzvzwwvvutxy}~~~~~~~}|}~~~~~~~~}~~}}~}}}}{}|||||}z}}||||||z{{{|{z{{zzzz{{zy{zyzzzxzzzz}z{{zzyyvtxxwwvwwvuvvx|~~~~}~z}~~~~~~~~~}~~}}~~~}{|}}{}|}||~}{{|}|{|z{z|zz{zyzy|xz|zzyyzz{wyyzzxz{{{}}ywxuxxwwxxwwxxzz|~~~~~~~|~~~~~~}~~~~~~~~~~}}}}{||{{}{z{|}{~||{|||{zyyz{zyzzz{zzzzy|z{x{z{xyz||||}{yyywwwxuzxxwzyy{|}~}~~~~}{}~~~~~~}~~}~}~}|~}}}~{}|{}}|}|}}||{{{{{z{zyy}{yxyy{{y{|y{zzy{zzxzz||{||}||yyvwxwwxvz{xy|}~}}~~~}~z~~}~~~~~}~|~}}|}~|}~~|}}|}~{}}{{{|z|z{{z{z|xz|{{|z{{{zz{zzyzzz{{}~{~}{|{yzzxyyyy{z{{}~}~~~}~~~~~}~~~}~~~~}~~}~~}~~}|~}~|||}}~}}}}}}{{|}|{|{{|{|z|z{z|{y|z{{{y{{z{z{|||||{}}{zyyzzzwyyz{}|}~~~~~{}~~~}~~}~~~~~}~}}}}}|||}}}}}~|}|}|z}||||{|{|z{|{{|{|yzz{|{|z|z{z{|{}}}~|}~|zyyz{{{z||y|~}|~}~~~~~}|~}~~}}}~~}~{}~}{}~~}|||}~|||||||{|{||{z{|{{|zzy{|}{z{}|}}|~}}~~}||x{{|y|}|{~~{~~}~~~~~~~}~}~~~~~~}|}}~|}~|~|}}}|~{}||{{{z{{z|z}|{||z|{}{z}z}}}z}}}~}}~}}|{}{|z|||||{}~}~~~~~}~~~}}~~}~}|}~}~~}}}}}}}}|{|{||}z}{}||}}{{|z}{|||}|}}~}~~~|}~~||{|~}}}~~~}}~}~~~~~~~~~~~~|}|}||}{}}|~}}~}}}||{{{||}}~|}||{|{}}|}|}{}~}||~}~~}~|~|{|}{}}}~}}~~~~~~~~~}~~~~}~~}~~~}~}||}{|~}}|~}~||}{{|}}}|{}||}|}|}|{}|}|}||}|~}~~~~}}}|}||}}|~}~~~~~~~~~~~~}}}}}~~}~|~~~}}}||}|||||{||||{}}||{|{||~|||{~~~~}}}~|~|}}}~}~~|~~~~~~~~~~~~~}~~~~}~~}}}~|}{}z}}}~||}|}}|{||}}}|~~~~~}|~~}~~~}}~~~~~~~~}~~~|~}}~~}}|}~}|}}||z}~}~}|}}|}}|~}~}~~~~~}}~~}~}~~}}~~~~}~~|~~~~~~~~|}}~~~~}}~~}{~~~|~~~~}}}~|||~}}~}|~~~~~~~}~~~~~~~~~~~}~}~~~~~~}~~}}~}}|}}}~}~~|{}}~~~~}}|}|}}~~~~~~~~}}~~~~}~~}~~~~~~~~}~~~~}~~|~|~}}}~~}~~}}~~|}}}~~~~~~~|~~Beignet-1.1.1-Source/examples/CMakeLists.txt000664 001750 001750 00000003514 12576733264 020055 0ustar00yryr000000 000000 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../utests ${CMAKE_CURRENT_SOURCE_DIR}/../include ${X11_INCLUDE_DIR}) IF(LIBVA_BUF_SH_DEP OR V4L2_BUF_SH_DEP) EXECUTE_PROCESS(COMMAND ls "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva" OUTPUT_VARIABLE LS_RESULT) IF ("LS_RESULT" STREQUAL "") EXECUTE_PROCESS(COMMAND git submodule init WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) EXECUTE_PROCESS(COMMAND git submodule update WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) EXECUTE_PROCESS(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva) ENDIF ("LS_RESULT" STREQUAL "") INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/va ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/test/common) link_directories (${LIBVA_LIBDIR} ${LIBVA-X11_LIBDIR}) set (va_ocl_basic_sources ../utests/utest_error.c ../utests/utest_assert.cpp ../utests/utest_file_map.cpp ../utests/utest_helper.cpp ./thirdparty/libva/test/common/va_display.c ./thirdparty/libva/test/common/va_display_x11.c) ADD_DEFINITIONS(-DHAVE_VA_X11) ADD_LIBRARY(va_ocl_basic SHARED ${va_ocl_basic_sources}) TARGET_LINK_LIBRARIES(va_ocl_basic cl m va va-x11 ${X11_X11_LIB}) IF(LIBVA_BUF_SH_DEP) ADD_DEFINITIONS(-DINPUT_NV12_DEFAULT="${CMAKE_CURRENT_SOURCE_DIR}/libva_buffer_sharing/256_128.nv12") ADD_EXECUTABLE(example-libva_buffer_sharing ./libva_buffer_sharing/libva_buffer_sharing.cpp) TARGET_LINK_LIBRARIES(example-libva_buffer_sharing va_ocl_basic) ENDIF(LIBVA_BUF_SH_DEP) IF(V4L2_BUF_SH_DEP) ADD_EXECUTABLE(example-v4l2_buffer_sharing ./v4l2_buffer_sharing/v4l2_buffer_sharing.cpp) TARGET_LINK_LIBRARIES(example-v4l2_buffer_sharing va_ocl_basic) ENDIF(V4L2_BUF_SH_DEP) ENDIF(LIBVA_BUF_SH_DEP OR V4L2_BUF_SH_DEP) Beignet-1.1.1-Source/intel-beignet.icd.in000664 001750 001750 00000000037 12576733264 017310 0ustar00yryr000000 000000 @BEIGNET_INSTALL_DIR@/libcl.so Beignet-1.1.1-Source/CMake/FindMesaSrc.cmake000664 001750 001750 00000001724 12576733264 017620 0ustar00yryr000000 000000 # # Try to find mesa source code # Once done this will define # # MESA_SOURCE_FOUND # MESA_SOURCE_INCLUDES # # Find mesa source code. FIND_PATH(MESA_SOURCE_PREFIX src/mesa/main/texobj.c $ENV{MESA_SOURCE_DIR} ${MAKE_CURRENT_SOURCE_DIR}/../mesa ~/mesa DOC "The mesa source directory which is needed for cl_khr_gl_sharing.") IF(MESA_SOURCE_PREFIX) SET(MESA_SOURCE_INCLUDES ${MESA_SOURCE_PREFIX}/src/mesa ${MESA_SOURCE_PREFIX}/include ${MESA_SOURCE_PREFIX}/src/mapi ${MESA_SOURCE_PREFIX}/src/mesa/drivers/dri/i965/ ${MESA_SOURCE_PREFIX}/src/mesa/drivers/dri/i915/ ${MESA_SOURCE_PREFIX}/src/mesa/drivers/dri/common/) SET(MESA_SOURCE_FOUND 1 CACHE STRING "Set to 1 if mesa source code is found, 0 otherwise") ELSE(MESA_SOURCE_PREFIX) SET(MESA_SOURCE_FOUND 0 CACHE STRING "Set to 1 if mesa source code is found, 0 otherwise") ENDIF(MESA_SOURCE_PREFIX) Beignet-1.1.1-Source/CMake/FindOCLIcd.cmake000664 001750 001750 00000001140 12576733264 017310 0ustar00yryr000000 000000 # # Try to find ocl_icd library and include path. # Once done this will define # # OCLIcd_FOUND # OCLIcd_INCLUDE_PATH # FIND_PATH(OCLIcd_INCLUDE_PATH ocl_icd.h ~/include/ /usr/include/ /usr/local/include/ /sw/include/ /opt/local/include/ DOC "The directory where ocl_icd.h resides") IF(OCLIcd_INCLUDE_PATH) INCLUDE_DIRECTORIES(${OCLIcd_INCLUDE_PATH}) SET(OCLIcd_FOUND 1 CACHE STRING "Set to 1 if OCLIcd is found, 0 otherwise") ELSE(OCLIcd_INCLUDE_PATH) SET(OCLIcd_FOUND 0 CACHE STRING "Set to 1 if OCLIcd is found, 0 otherwise") ENDIF(OCLIcd_INCLUDE_PATH) MARK_AS_ADVANCED(OCLIcd_FOUND) Beignet-1.1.1-Source/CMake/CMakeConfigTemplate.hpp000664 001750 001750 00000001702 12576733264 020767 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef CMAKE_CONFIG_HPP #define CMAKE_CONFIG_HPP #define ON true #define OFF false #define GEN_INSTALLATION_PATH "${CMAKE_INSTALL_PREFIX}/lib/i965/" #endif /* CMAKE_CONFIG_HPP */ Beignet-1.1.1-Source/CMake/FindStandaloneGbeCompiler.cmake000664 001750 001750 00000002656 12576733264 022471 0ustar00yryr000000 000000 # Find the standalone gbe compiler # # STANDALONE_GBE_COMPILER_DIR - base path of standalone compiler # STANDALONE_GBE_COMPILER - full file name of standalone compiler # GEN_PCI_ID - Gen's PCI ID IF (STANDALONE_GBE_COMPILER_DIR) FIND_PROGRAM(STANDALONE_GBE_COMPILER NAMES gbe_bin_generater DOC "standalone gbe compiler executable" PATHS ${STANDALONE_GBE_COMPILER_DIR} NO_DEFAULT_PATH) ELSE (STANDALONE_GBE_COMPILER_DIR) FIND_PROGRAM(STANDALONE_GBE_COMPILER NAMES gbe_bin_generater DOC "standalone gbe compiler executable" PATHS /usr/local/lib/beignet/) ENDIF (STANDALONE_GBE_COMPILER_DIR) IF (STANDALONE_GBE_COMPILER) MESSAGE(STATUS "Looking for standalone gbe compiler - found at ${STANDALONE_GBE_COMPILER}") STRING(REGEX REPLACE "(.*)/.*" "\\1" STANDALONE_GBE_COMPILER_DIR ${STANDALONE_GBE_COMPILER}) IF (NOT GEN_PCI_ID) Find_Program(LSPCI lspci) IF (LSPCI) MESSAGE(STATUS "Looking for lspci - found") ELSE (LSPCI) MESSAGE(FATAL_ERROR "Looking for lspci - not found") ENDIF (LSPCI) EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh" OUTPUT_VARIABLE GEN_PCI_ID) MESSAGE(STATUS "Platform Gen PCI id is " ${GEN_PCI_ID}) ENDIF (NOT GEN_PCI_ID) ELSE (STANDALONE_GBE_COMPILER) MESSAGE(FATAL_ERROR "Looking for standalone gbe compiler - not found") ENDIF (STANDALONE_GBE_COMPILER)Beignet-1.1.1-Source/CMake/FindLLVM.cmake000664 001750 001750 00000011755 12576733264 017042 0ustar00yryr000000 000000 # Find the native LLVM includes and library # # LLVM_INCLUDE_DIR - where to find llvm include files # LLVM_LIBRARY_DIR - where to find llvm libs # LLVM_CFLAGS - llvm compiler flags # LLVM_LDFLAGS - llvm linker flags # LLVM_MODULE_LIBS - list of llvm libs for working with modules. # LLVM_FOUND - True if llvm found. if (LLVM_INSTALL_DIR) find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 llvm-config-3.6 llvm-config-33 llvm-config-3.3 llvm-config-34 llvm-config-3.4 llvm-config DOC "llvm-config executable" PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH) else (LLVM_INSTALL_DIR) find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-35 llvm-config-3.5 llvm-config-36 llvm-config-3.6 llvm-config-33 llvm-config-3.3 llvm-config-34 llvm-config-3.4 llvm-config DOC "llvm-config executable") endif (LLVM_INSTALL_DIR) if (LLVM_CONFIG_EXECUTABLE) message(STATUS "LLVM llvm-config found at: ${LLVM_CONFIG_EXECUTABLE}") else (LLVM_CONFIG_EXECUTABLE) message(FATAL_ERROR "Could NOT find LLVM executable, please add -DLLVM_INSTALL_DIR=/path/to/llvm-config/ in cmake command") endif (LLVM_CONFIG_EXECUTABLE) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --version OUTPUT_VARIABLE LLVM_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE ) string(REGEX REPLACE "([0-9])\\.([0-9]*).*" "\\1\\2" LLVM_VERSION_NODOT ${LLVM_VERSION}) string(REGEX REPLACE "([0-9])\\.([0-9]*).*" "\\1.\\2" LLVM_VERSION_NOPATCH ${LLVM_VERSION}) if (LLVM_FIND_VERSION_MAJOR AND LLVM_FIND_VERSION_MINOR) SET(LLVM_FIND_VERSION_NODOT "${LLVM_FIND_VERSION_MAJOR}${LLVM_FIND_VERSION_MINOR}") if (LLVM_VERSION_NODOT VERSION_LESS LLVM_FIND_VERSION_NODOT) message(FATAL_ERROR "imcompatible LLVM version ${LLVM_VERSION} required ${LLVM_FIND_VERSION}") else (LLVM_VERSION_NODOT VERSION_LESS LLVM_FIND_VERSION_NODOT) if (LLVM_VERSION_NODOT VERSION_EQUAL LLVM_FIND_VERSION_NODOT) message(STATUS "find stable LLVM version ${LLVM_VERSION}") else (LLVM_VERSION_NODOT VERSION_EQUAL LLVM_FIND_VERSION_NODOT) message(STATUS "find unstable LLVM version ${LLVM_VERSION}") endif (LLVM_VERSION_NODOT VERSION_EQUAL LLVM_FIND_VERSION_NODOT) add_definitions("-DLLVM_${LLVM_VERSION_NODOT}") endif (LLVM_VERSION_NODOT VERSION_LESS LLVM_FIND_VERSION_NODOT) endif (LLVM_FIND_VERSION_MAJOR AND LLVM_FIND_VERSION_MINOR) if (LLVM_INSTALL_DIR) find_program(CLANG_EXECUTABLE NAMES clang-${LLVM_VERSION_NODOT} clang-${LLVM_VERSION_NOPATCH} clang PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH) find_program(LLVM_AS_EXECUTABLE NAMES llvm-as-${LLVM_VERSION_NODOT} llvm-as-${LLVM_VERSION_NOPATCH} llvm-as PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH) find_program(LLVM_LINK_EXECUTABLE NAMES llvm-link-${LLVM_VERSION_NODOT} llvm-link-${LLVM_VERSION_NOPATCH} llvm-link PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH) else (LLVM_INSTALL_DIR) find_program(CLANG_EXECUTABLE NAMES clang-${LLVM_VERSION_NODOT} clang-${LLVM_VERSION_NOPATCH} clang) find_program(LLVM_AS_EXECUTABLE NAMES llvm-as-${LLVM_VERSION_NODOT} llvm-as-${LLVM_VERSION_NOPATCH} llvm-as) find_program(LLVM_LINK_EXECUTABLE NAMES llvm-link-${LLVM_VERSION_NODOT} llvm-link-${LLVM_VERSION_NOPATCH} llvm-link) endif (LLVM_INSTALL_DIR) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --includedir OUTPUT_VARIABLE LLVM_INCLUDE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --libdir OUTPUT_VARIABLE LLVM_LIBRARY_DIR OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --cppflags OUTPUT_VARIABLE LLVM_CFLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --ldflags OUTPUT_VARIABLE LLVM_LDFLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --libs OUTPUT_VARIABLE LLVM_MODULE_LIBS OUTPUT_STRIP_TRAILING_WHITESPACE ) if (LLVM_VERSION_NODOT VERSION_GREATER 34) execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --system-libs OUTPUT_VARIABLE LLVM_SYSTEM_LIBS_ORIG OUTPUT_STRIP_TRAILING_WHITESPACE ) string(REGEX REPLACE " *\n" "" LLVM_SYSTEM_LIBS ${LLVM_SYSTEM_LIBS_ORIG}) endif (LLVM_VERSION_NODOT VERSION_GREATER 34) macro(add_one_lib name) FIND_LIBRARY(CLANG_LIB NAMES ${name} PATHS ${LLVM_LIBRARY_DIR} NO_DEFAULT_PATH) set(CLANG_LIBRARIES ${CLANG_LIBRARIES} ${CLANG_LIB}) unset(CLANG_LIB CACHE) endmacro() #Assume clang lib path same as llvm lib path add_one_lib("clangFrontend") add_one_lib("clangSerialization") add_one_lib("clangDriver") add_one_lib("clangCodeGen") add_one_lib("clangSema") add_one_lib("clangStaticAnalyzerFrontend") add_one_lib("clangStaticAnalyzerCheckers") add_one_lib("clangStaticAnalyzerCore") add_one_lib("clangAnalysis") add_one_lib("clangEdit") add_one_lib("clangAST") add_one_lib("clangParse") add_one_lib("clangSema") add_one_lib("clangLex") add_one_lib("clangBasic") Beignet-1.1.1-Source/GetGenID.sh000775 001750 001750 00000001112 12576733264 015414 0ustar00yryr000000 000000 #!/bin/bash genpciid=(0152 0162 0156 0166 015a 016a 0f31 0402 0412 0422 040a 041a 042a 0406 0416 0426 0c02 0c12 0c22 0c0a 0c1a 0c2a 0c06 0c16 0c26 0a02 0a12 0a22 0a0a 0a1a 0a2a 0a06 0a16 0a26 0d02 0d12 0d22 0d0a 0d1a 0d2a 0d06 0d16 0d26) pciid=($(lspci -nn | grep "\[8086:.*\]" -o | awk -F : '{print $2}' | awk -F ] '{print $1}')) n=${#pciid[*]} i=0 m=${#genpciid[*]} j=0 while [ $i -lt $n ] do id1=${pciid[$i]} let j=0 while [ $j -lt $m ] do id2=${genpciid[$j]} if [ ${id1} == ${id2} ] then echo ${id1} exit 0 fi let j=j+1 done let i=i+1 done Beignet-1.1.1-Source/src/cl_gl_api.c000664 001750 001750 00000010677 12576733264 016353 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Zhigang Gong */ #include #include #include #ifdef HAS_EGL #include #endif #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_mem.h" #include "cl_image.h" #include "cl_sampler.h" #include "cl_alloc.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_gl.h" #include "CL/cl_intel.h" #include "cl_mem_gl.h" #define CHECK_GL_CONTEXT(CTX) \ do { \ if (UNLIKELY(CTX->props.gl_type == CL_GL_NOSHARE)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ } while (0) cl_mem clCreateFromGLBuffer(cl_context context, cl_mem_flags flags, GLuint bufobj, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_buffer(context, flags, bufobj, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_texture(context, flags, texture_target, miplevel, texture, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_texture(context, flags, texture_target, miplevel, texture, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateFromGLTexture(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_GL_CONTEXT (context); mem = cl_mem_new_gl_texture(context, flags, target, miplevel, texture, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } /* XXX NULL function currently. */ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; return err; } /* XXX NULL function currently. */ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; return err; } Beignet-1.1.1-Source/src/performance.h000664 001750 001750 00000000514 12576733264 016735 0ustar00yryr000000 000000 #ifndef __PERFORMANCE_H__ #define __PERFORMANCE_H__ #include "CL/cl.h" extern int b_output_kernel_perf; void time_start(cl_context context, const char * kernel_name, cl_command_queue cq); void time_end(cl_context context, const char * kernel_name, const char * build_opt, cl_command_queue cq); void initialize_env_var(); #endif Beignet-1.1.1-Source/src/cl_driver_type.h000664 001750 001750 00000001702 12576733264 017446 0ustar00yryr000000 000000 /************************************************************************** * cl_driver: * Hide behind some call backs the buffer allocation / deallocation ... This * will allow us to make the use of a software performance simulator easier and * to minimize the code specific for the HW and for the simulator **************************************************************************/ #ifndef __CL_DRIVER_TYPE_H__ #define __CL_DRIVER_TYPE_H__ /* Encapsulates command buffer / data buffer / kernels */ typedef struct _cl_buffer *cl_buffer; /* Encapsulates buffer manager */ typedef struct _cl_buffer_mgr *cl_buffer_mgr; /* Encapsulates the driver backend functionalities */ typedef struct _cl_driver *cl_driver; /* Encapsulates the gpgpu stream of commands */ typedef struct _cl_gpgpu *cl_gpgpu; /* Encapsulates the event of a command stream */ typedef struct _cl_gpgpu_event *cl_gpgpu_event; typedef struct _cl_context_prop *cl_context_prop; #endif Beignet-1.1.1-Source/src/cl_thread.h000664 001750 001750 00000003447 12576733264 016371 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #ifndef __CL_THREAD_H__ #define __CL_THREAD_H__ #include #include "cl_internals.h" #include "cl_command_queue.h" /* Create the thread specific data. */ void* cl_thread_data_create(void); /* The destructor for clean the thread specific data. */ void cl_thread_data_destroy(cl_command_queue queue); /* Used to get the gpgpu struct of each thread. */ cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue); /* Used to release the gpgpu struct of each thread. */ void cl_invalid_thread_gpgpu(cl_command_queue queue); /* Used to set the batch buffer of each thread. */ void cl_set_thread_batch_buf(cl_command_queue queue, void* buf); /* Used to get the batch buffer of each thread. */ void* cl_get_thread_batch_buf(cl_command_queue queue); /* take current gpgpu from the thread gpgpu pool. */ cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue); cl_event get_current_event(cl_command_queue queue); cl_event get_last_event(cl_command_queue queue); void set_current_event(cl_command_queue queue, cl_event e); void set_last_event(cl_command_queue queue, cl_event e); #endif /* __CL_THREAD_H__ */ Beignet-1.1.1-Source/src/cl_gen7_device.h000664 001750 001750 00000002057 12576733264 017275 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Common fields for both IVB devices (either GT1 or GT2) */ .max_parameter_size = 1024, .global_mem_cache_line_size = 64, /* XXX */ .global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, .scratch_mem_size = 12 << 10, #include "cl_gt_device.h" Beignet-1.1.1-Source/src/cl_gen75_device.h000664 001750 001750 00000002057 12576733264 017362 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Common fields for both SNB devices (either GT1 or GT2) */ .max_parameter_size = 1024, .global_mem_cache_line_size = 64, /* XXX */ .global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, .scratch_mem_size = 2 << 20, #include "cl_gt_device.h" Beignet-1.1.1-Source/src/cl_event.c000664 001750 001750 00000054542 12600662242 016221 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #include "cl_event.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include "cl_command_queue.h" #include #include inline cl_bool cl_event_is_gpu_command_type(cl_command_type type) { switch(type) { case CL_COMMAND_COPY_BUFFER: case CL_COMMAND_FILL_BUFFER: case CL_COMMAND_COPY_IMAGE: case CL_COMMAND_COPY_IMAGE_TO_BUFFER: case CL_COMMAND_COPY_BUFFER_TO_IMAGE: case CL_COMMAND_COPY_BUFFER_RECT: case CL_COMMAND_TASK: case CL_COMMAND_NDRANGE_KERNEL: return CL_TRUE; default: return CL_FALSE; } } int cl_event_flush(cl_event event) { int err = CL_SUCCESS; assert(event->gpgpu_event != NULL); if (event->gpgpu) { err = cl_command_queue_flush_gpgpu(event->queue, event->gpgpu); cl_gpgpu_delete(event->gpgpu); event->gpgpu = NULL; } cl_gpgpu_event_flush(event->gpgpu_event); set_last_event(event->queue, event); return err; } cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict) { cl_event event = NULL; GET_QUEUE_THREAD_GPGPU(queue); /* Allocate and inialize the structure itself */ TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event)); SET_ICD(event->dispatch) event->magic = CL_MAGIC_EVENT_HEADER; event->ref_n = 1; /* Append the event in the context event list */ pthread_mutex_lock(&ctx->event_lock); event->next = ctx->events; if (ctx->events != NULL) ctx->events->prev = event; ctx->events = event; pthread_mutex_unlock(&ctx->event_lock); event->ctx = ctx; cl_context_add_ref(ctx); /* Initialize all members and create GPGPU event object */ event->queue = queue; event->type = type; event->gpgpu_event = NULL; if(type == CL_COMMAND_USER) { event->status = CL_SUBMITTED; } else { event->status = CL_QUEUED; if(cl_event_is_gpu_command_type(event->type)) event->gpgpu_event = cl_gpgpu_event_new(gpgpu); } cl_event_add_ref(event); //dec when complete event->user_cb = NULL; event->enqueue_cb = NULL; event->waits_head = NULL; event->emplict = emplict; exit: return event; error: cl_event_delete(event); event = NULL; goto exit; } void cl_event_delete(cl_event event) { if (UNLIKELY(event == NULL)) return; cl_event_update_status(event, 0); if (atomic_dec(&event->ref_n) > 1) return; if(event->queue && get_last_event(event->queue) == event) set_last_event(event->queue, NULL); /* Call all user's callback if haven't execute */ cl_event_call_callback(event, CL_COMPLETE, CL_TRUE); // CL_COMPLETE status will force all callbacks that are not executed to run /* delete gpgpu event object */ if(event->gpgpu_event) cl_gpgpu_event_delete(event->gpgpu_event); /* Remove it from the list */ assert(event->ctx); pthread_mutex_lock(&event->ctx->event_lock); if (event->prev) event->prev->next = event->next; if (event->next) event->next->prev = event->prev; /* if this is the head, update head pointer ctx->events */ if (event->ctx->events == event) event->ctx->events = event->next; pthread_mutex_unlock(&event->ctx->event_lock); cl_context_delete(event->ctx); if (event->gpgpu) { fprintf(stderr, "Warning: a event is deleted with a pending enqueued task.\n"); cl_gpgpu_delete(event->gpgpu); event->gpgpu = NULL; } cl_free(event); } void cl_event_add_ref(cl_event event) { assert(event); atomic_inc(&event->ref_n); } cl_int cl_event_set_callback(cl_event event , cl_int command_exec_callback_type, EVENT_NOTIFY pfn_notify, void* user_data) { assert(event); assert(pfn_notify); cl_int err = CL_SUCCESS; user_callback *cb; TRY_ALLOC(cb, CALLOC(user_callback)); cb->pfn_notify = pfn_notify; cb->user_data = user_data; cb->status = command_exec_callback_type; cb->executed = CL_FALSE; // It is possible that the event enqueued is already completed. // clEnqueueReadBuffer can be synchronous and when the callback // is registered after, it still needs to get executed. pthread_mutex_lock(&event->ctx->event_lock); // Thread safety required: operations on the event->status can be made from many different threads if(event->status <= command_exec_callback_type) { /* Call user callback */ pthread_mutex_unlock(&event->ctx->event_lock); // pfn_notify can call clFunctions that use the event_lock and from here it's not required cb->pfn_notify(event, event->status, cb->user_data); cl_free(cb); } else { // Enqueue to callback list cb->next = event->user_cb; event->user_cb = cb; pthread_mutex_unlock(&event->ctx->event_lock); } exit: return err; error: err = CL_OUT_OF_HOST_MEMORY; cl_free(cb); goto exit; }; cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event,cl_context ctx) { cl_int err = CL_SUCCESS; cl_int i; /* check the event_wait_list and num_events_in_wait_list */ if((event_wait_list == NULL) && (num_events_in_wait_list > 0)) goto error; if ((event_wait_list != NULL) && (num_events_in_wait_list == 0)){ goto error; } /* check the event and context */ for(i=0; istatus < CL_COMPLETE) { err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; goto exit; } if(event && event == &event_wait_list[i]) goto error; if(event_wait_list[i]->ctx != ctx) goto error; } exit: return err; error: err = CL_INVALID_EVENT_WAIT_LIST; //reset error goto exit; } cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_command_queue queue) { cl_int i; /* Check whether wait user events */ for(i=0; istatus <= CL_COMPLETE) continue; /* Need wait on user event, return and do enqueue defer */ if((event_wait_list[i]->type == CL_COMMAND_USER) || (event_wait_list[i]->enqueue_cb && (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){ return CL_ENQUEUE_EXECUTE_DEFER; } } if(queue && queue->barrier_events_num ) return CL_ENQUEUE_EXECUTE_DEFER; /* Non user events or all user event finished, wait all enqueue events finish */ for(i=0; istatus <= CL_COMPLETE) continue; //enqueue callback haven't finish, in another thread, wait if(event_wait_list[i]->enqueue_cb != NULL) return CL_ENQUEUE_EXECUTE_DEFER; if(event_wait_list[i]->gpgpu_event) cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1); cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback } return CL_ENQUEUE_EXECUTE_IMM; } void cl_event_new_enqueue_callback(cl_event event, enqueue_data *data, cl_uint num_events_in_wait_list, const cl_event *event_wait_list) { enqueue_callback *cb, *node; user_event *user_events, *u_ev; cl_command_queue queue = event->queue; cl_int i; cl_int err = CL_SUCCESS; /* Allocate and initialize the structure itself */ TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback)); cb->num_events = 0; TRY_ALLOC_NO_ERR (cb->wait_list, CALLOC_ARRAY(cl_event, num_events_in_wait_list)); for(i=0; iwait_user_events, need not in wait list, avoid ref twice if(event_wait_list[i]->type != CL_COMMAND_USER) { cb->wait_list[cb->num_events++] = event_wait_list[i]; cl_event_add_ref(event_wait_list[i]); //add defer enqueue's wait event reference } } cb->event = event; cb->next = NULL; cb->wait_user_events = NULL; if(queue && queue->barrier_events_num > 0) { for(i=0; ibarrier_events_num; i++) { /* Insert the enqueue_callback to user event list */ node = queue->wait_events[i]->waits_head; if(node == NULL) queue->wait_events[i]->waits_head = cb; else{ while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; } /* Insert the user event to enqueue_callback's wait_user_events */ TRY(cl_event_insert_user_event, &cb->wait_user_events, queue->wait_events[i]); cl_event_add_ref(queue->wait_events[i]); } } /* Find out all user events that in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) continue; if(event_wait_list[i]->type == CL_COMMAND_USER) { /* Insert the enqueue_callback to user event list */ node = event_wait_list[i]->waits_head; if(node == NULL) event_wait_list[i]->waits_head = cb; else { while((node != cb) && node->next) node = node->next; if(node == cb) //wait on dup user event continue; node->next = cb; } /* Insert the user event to enqueue_callback's wait_user_events */ TRY(cl_event_insert_user_event, &cb->wait_user_events, event_wait_list[i]); cl_event_add_ref(event_wait_list[i]); cl_command_queue_insert_event(event->queue, event_wait_list[i]); if(data->type == EnqueueBarrier){ cl_command_queue_insert_barrier_event(event->queue, event_wait_list[i]); } } else if(event_wait_list[i]->enqueue_cb != NULL) { user_events = event_wait_list[i]->enqueue_cb->wait_user_events; while(user_events != NULL) { /* Insert the enqueue_callback to user event's waits_tail */ node = user_events->event->waits_head; if(node == NULL) event_wait_list[i]->waits_head = cb; else{ while((node != cb) && node->next) node = node->next; if(node == cb) { //wait on dup user event user_events = user_events->next; continue; } node->next = cb; } /* Insert the user event to enqueue_callback's wait_user_events */ TRY(cl_event_insert_user_event, &cb->wait_user_events, user_events->event); cl_event_add_ref(user_events->event); cl_command_queue_insert_event(event->queue, user_events->event); if(data->type == EnqueueBarrier){ cl_command_queue_insert_barrier_event(event->queue, user_events->event); } user_events = user_events->next; } } } if(data->queue != NULL && event->gpgpu_event != NULL) { event->gpgpu = cl_thread_gpgpu_take(event->queue); data->ptr = (void *)event->gpgpu_event; } cb->data = *data; event->enqueue_cb = cb; exit: return; error: if(cb) { while(cb->wait_user_events) { u_ev = cb->wait_user_events; cb->wait_user_events = cb->wait_user_events->next; cl_event_delete(u_ev->event); cl_free(u_ev); } for(i=0; inum_events; i++) { if(cb->wait_list[i]) { cl_event_delete(cb->wait_list[i]); } } cl_free(cb); } goto exit; } void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb) { user_callback *user_cb = NULL; user_callback *queue_cb = NULL; // For thread safety, we create a queue that holds user_callback's pfn_notify contents user_callback *temp_cb = NULL; user_cb = event->user_cb; pthread_mutex_lock(&event->ctx->event_lock); while(user_cb) { if(user_cb->status >= status && user_cb->executed == CL_FALSE) { // Added check to not execute a callback when it was already handled user_cb->executed = CL_TRUE; temp_cb = cl_malloc(sizeof(user_callback)); if(!temp_cb) { break; // Out of memory } temp_cb->pfn_notify = user_cb->pfn_notify; // Minor struct copy to call ppfn_notify out of the pthread_mutex temp_cb->user_data = user_cb->user_data; if(free_cb) { cl_free(user_cb); } if(!queue_cb) { queue_cb = temp_cb; queue_cb->next = NULL; } else { // Enqueue First temp_cb->next = queue_cb; queue_cb = temp_cb; } } user_cb = user_cb->next; } pthread_mutex_unlock(&event->ctx->event_lock); // Calling the callbacks outside of the event_lock is required because the callback can call cl_api functions and get deadlocked while(queue_cb) { // For each callback queued, actually execute the callback queue_cb->pfn_notify(event, event->status, queue_cb->user_data); temp_cb = queue_cb; queue_cb = queue_cb->next; cl_free(temp_cb); } } void cl_event_set_status(cl_event event, cl_int status) { cl_int ret, i; cl_event evt; pthread_mutex_lock(&event->ctx->event_lock); if(status >= event->status) { pthread_mutex_unlock(&event->ctx->event_lock); return; } if(event->status <= CL_COMPLETE) { event->status = status; //have done enqueue before or doing in another thread pthread_mutex_unlock(&event->ctx->event_lock); return; } if(status <= CL_COMPLETE) { if(event->enqueue_cb) { if(status == CL_COMPLETE) { cl_enqueue_handle(event, &event->enqueue_cb->data); if(event->gpgpu_event) cl_gpgpu_event_update_status(event->gpgpu_event, 1); //now set complet, need refine } else { if(event->gpgpu_event) { // Error then cancel the enqueued event. cl_gpgpu_delete(event->gpgpu); event->gpgpu = NULL; } } event->status = status; //Change the event status after enqueue and befor unlock pthread_mutex_unlock(&event->ctx->event_lock); for(i=0; ienqueue_cb->num_events; i++) cl_event_delete(event->enqueue_cb->wait_list[i]); pthread_mutex_lock(&event->ctx->event_lock); if(event->enqueue_cb->wait_list) cl_free(event->enqueue_cb->wait_list); cl_free(event->enqueue_cb); event->enqueue_cb = NULL; } } if(event->status >= status) //maybe changed in other threads event->status = status; pthread_mutex_unlock(&event->ctx->event_lock); /* Call user callback */ cl_event_call_callback(event, status, CL_FALSE); if(event->type == CL_COMMAND_USER) { /* Check all defer enqueue */ enqueue_callback *cb, *enqueue_cb = event->waits_head; while(enqueue_cb) { /* Remove this user event in enqueue_cb, update the header if needed. */ cl_event_remove_user_event(&enqueue_cb->wait_user_events, event); cl_event_delete(event); /* Still wait on other user events */ if(enqueue_cb->wait_user_events != NULL) { enqueue_cb = enqueue_cb->next; continue; } //remove user event frome enqueue_cb's ctx cl_command_queue_remove_event(enqueue_cb->event->queue, event); cl_command_queue_remove_barrier_event(enqueue_cb->event->queue, event); /* All user events complete, now wait enqueue events */ ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list, enqueue_cb->event->queue); assert(ret != CL_ENQUEUE_EXECUTE_DEFER); ret = ~ret; cb = enqueue_cb; enqueue_cb = enqueue_cb->next; /* Call the pending operation */ evt = cb->event; /* TODO: if this event wait on several events, one event's status is error, the others is complete, what's the status of this event? Can't find the description in OpenCL spec. Simply update to latest finish wait event.*/ cl_event_set_status(cb->event, status); if(evt->emplict == CL_FALSE) { cl_event_delete(evt); } } event->waits_head = NULL; } if(event->status <= CL_COMPLETE) cl_event_delete(event); } void cl_event_update_status(cl_event event, int wait) { if(event->status <= CL_COMPLETE) return; if((event->gpgpu_event) && (cl_gpgpu_event_update_status(event->gpgpu_event, wait) == command_complete)) cl_event_set_status(event, CL_COMPLETE); } cl_int cl_event_marker_with_wait_list(cl_command_queue queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event* event) { enqueue_data data = { 0 }; cl_event e; e = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); if(e == NULL) return CL_OUT_OF_HOST_MEMORY; if(event != NULL ){ *event = e; } //enqueues a marker command which waits for either a list of events to complete, or if the list is //empty it waits for all commands previously enqueued in command_queue to complete before it completes. if(num_events_in_wait_list > 0){ if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) { data.type = EnqueueMarker; cl_event_new_enqueue_callback(*event, &data, num_events_in_wait_list, event_wait_list); return CL_SUCCESS; } } else if(queue->wait_events_num > 0) { data.type = EnqueueMarker; cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events); return CL_SUCCESS; } cl_event last_event = get_last_event(queue); if(last_event && last_event->gpgpu_event) cl_gpgpu_event_update_status(last_event->gpgpu_event, 1); cl_event_set_status(e, CL_COMPLETE); return CL_SUCCESS; } cl_int cl_event_barrier_with_wait_list(cl_command_queue queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event* event) { enqueue_data data = { 0 }; cl_event e; e = cl_event_new(queue->ctx, queue, CL_COMMAND_BARRIER, CL_TRUE); if(e == NULL) return CL_OUT_OF_HOST_MEMORY; if(event != NULL ){ *event = e; } //enqueues a barrier command which waits for either a list of events to complete, or if the list is //empty it waits for all commands previously enqueued in command_queue to complete before it completes. if(num_events_in_wait_list > 0){ if(cl_event_wait_events(num_events_in_wait_list, event_wait_list, queue) == CL_ENQUEUE_EXECUTE_DEFER) { data.type = EnqueueBarrier; cl_event_new_enqueue_callback(e, &data, num_events_in_wait_list, event_wait_list); return CL_SUCCESS; } } else if(queue->wait_events_num > 0) { data.type = EnqueueBarrier; cl_event_new_enqueue_callback(e, &data, queue->wait_events_num, queue->wait_events); return CL_SUCCESS; } cl_event last_event = get_last_event(queue); if(last_event && last_event->gpgpu_event) cl_gpgpu_event_update_status(last_event->gpgpu_event, 1); cl_event_set_status(e, CL_COMPLETE); return CL_SUCCESS; } cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time) { struct timespec ts; if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){ printf("CPU Timmer error\n"); return CL_FALSE; } *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec; return CL_SUCCESS; } cl_int cl_event_get_queued_cpu_timestamp(cl_event event) { cl_int ret_val; ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp); return ret_val; } cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp) { cl_ulong ret_val; if(end_timestamp > start_timestamp){ ret_val = end_timestamp - start_timestamp; } else { /*if start time stamp is greater than end timstamp then set ret value to max*/ ret_val = ((cl_ulong) 1 << 32); } return ret_val; } cl_ulong cl_event_get_start_timestamp(cl_event event) { cl_ulong ret_val; ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[2]); return ret_val; } cl_ulong cl_event_get_end_timestamp(cl_event event) { cl_ulong ret_val; ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[3]); return ret_val; } cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name) { cl_ulong ret_val = 0; GET_QUEUE_THREAD_GPGPU(event->queue); if (!event->gpgpu_event) { cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val); event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; return CL_SUCCESS; } if(param_name == CL_PROFILING_COMMAND_SUBMIT || param_name == CL_PROFILING_COMMAND_QUEUED) { cl_gpgpu_event_get_gpu_cur_timestamp(gpgpu, &ret_val); event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; return CL_SUCCESS; } else if(param_name == CL_PROFILING_COMMAND_START) { cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 0, &ret_val); event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; return CL_SUCCESS; } else if (param_name == CL_PROFILING_COMMAND_END) { cl_gpgpu_event_get_exec_timestamp(gpgpu, event->gpgpu_event, 1, &ret_val); event->timestamp[param_name - CL_PROFILING_COMMAND_QUEUED] = ret_val; return CL_SUCCESS; } return CL_INVALID_VALUE; } cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event) { user_event * u_iter = *p_u_ev; user_event * u_ev; while(u_iter) { if(u_iter->event == event) return CL_SUCCESS; u_iter = u_iter->next; } TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); u_ev->event = event; u_ev->next = *p_u_ev; *p_u_ev = u_ev; return CL_SUCCESS; error: return CL_FALSE; } cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event) { user_event * u_iter = *p_u_ev; user_event * u_prev = *p_u_ev; while(u_iter){ if(u_iter->event == event ){ if(u_iter == *p_u_ev){ *p_u_ev = u_iter->next; }else{ u_prev->next = u_iter->next; } cl_free(u_iter); break; } u_prev = u_iter; u_iter = u_iter->next; } return CL_SUCCESS; } Beignet-1.1.1-Source/src/cl_enqueue.c000664 001750 001750 00000033724 12576744576 016575 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #include #include #include #include #include "cl_enqueue.h" #include "cl_image.h" #include "cl_driver.h" #include "cl_event.h" #include "cl_command_queue.h" #include "cl_utils.h" cl_int cl_enqueue_read_buffer(enqueue_data* data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; //cl_buffer_get_subdata sometime is very very very slow in linux kernel, in skl and chv, //and it is randomly. So temporary disable it, use map/copy/unmap to read. //Should re-enable it after find root cause. if (0 && !mem->is_userptr) { if (cl_buffer_get_subdata(mem->bo, data->offset + buffer->sub_offset, data->size, data->ptr) != 0) err = CL_MAP_FAILURE; } else { void* src_ptr = cl_mem_map_auto(mem, 0); if (src_ptr == NULL) err = CL_MAP_FAILURE; else { memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset, data->size); cl_mem_unmap_auto(mem); } } return err; } cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) { cl_int err = CL_SUCCESS; void* src_ptr; void* dst_ptr; const size_t* origin = data->origin; const size_t* host_origin = data->host_origin; const size_t* region = data->region; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (!(src_ptr = cl_mem_map_auto(mem, 0))) { err = CL_MAP_FAILURE; goto error; } size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2]; src_ptr = (char*)src_ptr + offset + buffer->sub_offset; offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; dst_ptr = (char *)data->ptr + offset; if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = dst_ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, region[0]); src += data->row_pitch; dst += data->host_row_pitch; } src_ptr = (char*)src_ptr + data->slice_pitch; dst_ptr = (char*)dst_ptr + data->host_slice_pitch; } } err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_write_buffer(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (mem->is_userptr) { void* dst_ptr = cl_mem_map_auto(mem, 1); if (dst_ptr == NULL) err = CL_MAP_FAILURE; else { memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size); cl_mem_unmap_auto(mem); } } else { if (cl_buffer_subdata(mem->bo, data->offset + buffer->sub_offset, data->size, data->const_ptr) != 0) err = CL_MAP_FAILURE; } return err; } cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) { cl_int err = CL_SUCCESS; void* src_ptr; void* dst_ptr; const size_t* origin = data->origin; const size_t* host_origin = data->host_origin; const size_t* region = data->region; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (!(dst_ptr = cl_mem_map_auto(mem, 1))) { err = CL_MAP_FAILURE; goto error; } size_t offset = origin[0] + data->row_pitch*origin[1] + data->slice_pitch*origin[2]; dst_ptr = (char *)dst_ptr + offset + buffer->sub_offset; offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; src_ptr = (char*)data->const_ptr + offset; if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = dst_ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, region[0]); src += data->host_row_pitch; dst += data->row_pitch; } src_ptr = (char*)src_ptr + data->host_slice_pitch; dst_ptr = (char*)dst_ptr + data->slice_pitch; } } err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_read_image(enqueue_data *data) { cl_int err = CL_SUCCESS; void* src_ptr; cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); const size_t* origin = data->origin; const size_t* region = data->region; if (!(src_ptr = cl_mem_map_auto(mem, 0))) { err = CL_MAP_FAILURE; goto error; } size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; src_ptr = (char*)src_ptr + offset; if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch && (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch))) { memcpy(data->ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src = src_ptr; char* dst = data->ptr; for (y = 0; y < region[1]; y++) { memcpy(dst, src, image->bpp*region[0]); src += image->row_pitch; dst += data->row_pitch; } src_ptr = (char*)src_ptr + image->slice_pitch; data->ptr = (char*)data->ptr + data->slice_pitch; } } err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_write_image(enqueue_data *data) { cl_int err = CL_SUCCESS; void* dst_ptr; cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); if (!(dst_ptr = cl_mem_map_auto(mem, 1))) { err = CL_MAP_FAILURE; goto error; } //dst need to add offset cl_mem_copy_image_region(data->origin, data->region, dst_ptr, image->row_pitch, image->slice_pitch, data->const_ptr, data->row_pitch, data->slice_pitch, image, CL_TRUE, CL_FALSE); err = cl_mem_unmap_auto(mem); error: return err; } cl_int cl_enqueue_map_buffer(enqueue_data *data) { void *ptr = NULL; cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; assert(mem->type == CL_MEM_BUFFER_TYPE || mem->type == CL_MEM_SUBBUFFER_TYPE); struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (mem->is_userptr) ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); else { if(data->unsync_map == 1) //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here ptr = cl_mem_map_gtt(mem); else ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); } if (ptr == NULL) { err = CL_MAP_FAILURE; goto error; } data->ptr = ptr; if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) { assert(mem->host_ptr); ptr = (char*)ptr + data->offset + buffer->sub_offset; memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size); } error: return err; } cl_int cl_enqueue_map_image(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; void *ptr = NULL; size_t row_pitch = 0; CHECK_IMAGE(mem, image); if(data->unsync_map == 1) //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here ptr = cl_mem_map_gtt(mem); else ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); if (ptr == NULL) { err = CL_MAP_FAILURE; goto error; } data->ptr = ptr; if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) row_pitch = image->slice_pitch; else row_pitch = image->row_pitch; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); //src and dst need add offset in function cl_mem_copy_image_region cl_mem_copy_image_region(data->origin, data->region, mem->host_ptr, image->host_row_pitch, image->host_slice_pitch, data->ptr, row_pitch, image->slice_pitch, image, CL_TRUE, CL_TRUE); } error: return err; } cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) { cl_int err = CL_SUCCESS; int i, j; size_t mapped_size = 0; size_t origin[3], region[3]; void * v_ptr = NULL; void * mapped_ptr = data->ptr; cl_mem memobj = data->mem_obj; size_t row_pitch = 0; assert(memobj->mapped_ptr_sz >= memobj->map_ref); INVALID_VALUE_IF(!mapped_ptr); for (i = 0; i < memobj->mapped_ptr_sz; i++) { if (memobj->mapped_ptr[i].ptr == mapped_ptr) { memobj->mapped_ptr[i].ptr = NULL; mapped_size = memobj->mapped_ptr[i].size; v_ptr = memobj->mapped_ptr[i].v_ptr; for(j=0; j<3; j++) { region[j] = memobj->mapped_ptr[i].region[j]; origin[j] = memobj->mapped_ptr[i].origin[j]; memobj->mapped_ptr[i].region[j] = 0; memobj->mapped_ptr[i].origin[j] = 0; } memobj->mapped_ptr[i].size = 0; memobj->mapped_ptr[i].v_ptr = NULL; memobj->map_ref--; break; } } /* can not find a mapped address? */ INVALID_VALUE_IF(i == memobj->mapped_ptr_sz); if (memobj->flags & CL_MEM_USE_HOST_PTR) { if(memobj->type == CL_MEM_BUFFER_TYPE || memobj->type == CL_MEM_SUBBUFFER_TYPE) { assert(mapped_ptr >= memobj->host_ptr && mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); /* Sync the data. */ if (!memobj->is_userptr) memcpy(v_ptr, mapped_ptr, mapped_size); } else { CHECK_IMAGE(memobj, image); if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) row_pitch = image->slice_pitch; else row_pitch = image->row_pitch; //v_ptr have added offset, host_ptr have not added offset. cl_mem_copy_image_region(origin, region, v_ptr, row_pitch, image->slice_pitch, memobj->host_ptr, image->host_row_pitch, image->host_slice_pitch, image, CL_FALSE, CL_TRUE); } } else { assert(v_ptr == mapped_ptr); } cl_mem_unmap_auto(memobj); /* shrink the mapped slot. */ if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { int j = 0; cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2)); if (!new_ptr) { /* Just do nothing. */ goto error; } memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr)); for (i = 0; i < memobj->mapped_ptr_sz; i++) { if (memobj->mapped_ptr[i].ptr) { new_ptr[j] = memobj->mapped_ptr[i]; j++; assert(j < memobj->mapped_ptr_sz/2); } } memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2; free(memobj->mapped_ptr); memobj->mapped_ptr = new_ptr; } error: return err; } cl_int cl_enqueue_native_kernel(enqueue_data *data) { cl_int err = CL_SUCCESS; cl_uint num_mem_objects = (cl_uint)data->offset; const cl_mem *mem_list = data->mem_list; const void **args_mem_loc = (const void **)data->const_ptr; cl_uint i; for (i=0; iuser_func(data->ptr); for (i=0; iptr); error: return err; } cl_int cl_enqueue_handle(cl_event event, enqueue_data* data) { /* if need profiling, add the submit timestamp here. */ if (event && event->type != CL_COMMAND_USER && event->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(event, CL_PROFILING_COMMAND_SUBMIT); } switch(data->type) { case EnqueueReadBuffer: return cl_enqueue_read_buffer(data); case EnqueueReadBufferRect: return cl_enqueue_read_buffer_rect(data); case EnqueueWriteBuffer: return cl_enqueue_write_buffer(data); case EnqueueWriteBufferRect: return cl_enqueue_write_buffer_rect(data); case EnqueueReadImage: return cl_enqueue_read_image(data); case EnqueueWriteImage: return cl_enqueue_write_image(data); case EnqueueMapBuffer: return cl_enqueue_map_buffer(data); case EnqueueMapImage: return cl_enqueue_map_image(data); case EnqueueUnmapMemObject: return cl_enqueue_unmap_mem_object(data); case EnqueueCopyBufferRect: case EnqueueCopyBuffer: case EnqueueCopyImage: case EnqueueCopyBufferToImage: case EnqueueCopyImageToBuffer: case EnqueueNDRangeKernel: case EnqueueFillBuffer: case EnqueueFillImage: return cl_event_flush(event); case EnqueueNativeKernel: return cl_enqueue_native_kernel(data); case EnqueueMigrateMemObj: default: return CL_SUCCESS; } } Beignet-1.1.1-Source/src/cl_program.h000664 001750 001750 00000012775 12576733264 016575 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_PROGRAM_H__ #define __CL_PROGRAM_H__ #include "cl_internals.h" #include "cl_gbe_loader.h" #include "CL/cl.h" #include #include // This is the structure ouput by the compiler struct _gbe_program; enum { FROM_SOURCE = 0, FROM_LLVM = 1, FROM_BINARY = 2, FROM_LLVM_SPIR = 3 }; /* This maps an OCL file containing some kernels */ struct _cl_program { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a program */ volatile int ref_n; /* We reference count this object */ gbe_program opaque; /* (Opaque) program as ouput by the compiler */ cl_kernel *ker; /* All kernels included by the OCL file */ cl_program prev, next; /* We chain the programs together */ cl_context ctx; /* Its parent context */ char *bin; /* The program copied verbatim */ size_t bin_sz; /* Its size in memory */ char *source; /* Program sources */ char *binary; /* Program binary. */ size_t binary_sz; /* The binary size. */ uint32_t binary_type; /* binary type: COMPILED_OBJECT(LLVM IR), LIBRARY(LLVM IR with option "-create-library"), or EXECUTABLE(GEN binary). */ uint32_t ker_n; /* Number of declared kernels */ uint32_t source_type:2; /* Built from binary, source or LLVM */ uint32_t is_built:1; /* Did we call clBuildProgram on it? */ int32_t build_status; /* build status. */ char *build_opts; /* The build options for this program */ size_t build_log_max_sz; /*build log maximum size in byte.*/ char *build_log; /* The build log for this program. */ size_t build_log_sz; /* The actual build log size.*/ }; /* Create a empty program */ extern cl_program cl_program_new(cl_context); /* Destroy and deallocate an empty kernel */ extern void cl_program_delete(cl_program); /* Add one more reference to the object (to defer its deletion) */ extern void cl_program_add_ref(cl_program); /* Create a kernel for the OCL user */ extern cl_kernel cl_program_create_kernel(cl_program, const char*, cl_int*); /* creates kernel objects for all kernel functions in program. */ extern cl_int cl_program_create_kernels_in_program(cl_program, cl_kernel*); /* Create a program from OCL source */ extern cl_program cl_program_create_from_source(cl_context ctx, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret); /* Directly create a program from a blob */ extern cl_program cl_program_create_from_binary(cl_context context, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret); /* Create a program with built-in kernels*/ extern cl_program cl_program_create_with_built_in_kernles(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * kernel_names, cl_int * errcode_ret); /* Directly create a program from a LLVM source file */ extern cl_program cl_program_create_from_llvm(cl_context context, cl_uint num_devices, const cl_device_id * devices, const char * fileName, cl_int * errcode_ret); /* Build the program as specified by OCL */ extern cl_int cl_program_build(cl_program p, const char* options); /* Compile the program as specified by OCL */ extern cl_int cl_program_compile(cl_program p, cl_uint num_input_headers, const cl_program * input_headers, const char ** header_include_names, const char* options); /* link the program as specified by OCL */ extern cl_program cl_program_link(cl_context context, cl_uint num_input_programs, const cl_program * input_programs, const char * options, cl_int* errcode_ret); /* Get the kernel names in program */ extern void cl_program_get_kernel_names(cl_program p, size_t size, char *names, size_t *size_ret); #endif /* __CL_PROGRAM_H__ */ Beignet-1.1.1-Source/src/cl_command_queue.h000664 001750 001750 00000010410 12576733264 017730 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_COMMAND_QUEUE_H__ #define __CL_COMMAND_QUEUE_H__ #include "cl_internals.h" #include "cl_driver.h" #include "cl_thread.h" #include "CL/cl.h" #include struct intel_gpgpu; /* Basically, this is a (kind-of) batch buffer */ struct _cl_command_queue { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a command queue */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* Its parent context */ cl_event* barrier_events; /* Point to array of non-complete user events that block this command queue */ cl_int barrier_events_num; /* Number of Non-complete user events */ cl_int barrier_events_size; /* The size of array that wait_events point to */ cl_event* wait_events; /* Point to array of non-complete user events that block this command queue */ cl_int wait_events_num; /* Number of Non-complete user events */ cl_int wait_events_size; /* The size of array that wait_events point to */ cl_command_queue_properties props; /* Queue properties */ cl_command_queue prev, next; /* We chain the command queues together */ void *thread_data; /* Used to store thread context data */ cl_mem perf; /* Where to put the perf counters */ }; /* The macro to get the thread specified gpgpu struct. */ #define GET_QUEUE_THREAD_GPGPU(queue) \ cl_gpgpu gpgpu = queue ? cl_get_thread_gpgpu(queue) : NULL; \ if (queue) \ assert(gpgpu); /* Allocate and initialize a new command queue. Also insert it in the list of * command queue in the associated context */ extern cl_command_queue cl_command_queue_new(cl_context); /* Destroy and deallocate the command queue */ extern void cl_command_queue_delete(cl_command_queue); /* Keep one more reference on the queue */ extern void cl_command_queue_add_ref(cl_command_queue); /* Map ND range kernel from OCL API */ extern cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel ker, const uint32_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size); /* The memory object where to report the performance */ extern cl_int cl_command_queue_set_report_buffer(cl_command_queue, cl_mem); /* Flush for the command queue */ extern cl_int cl_command_queue_flush(cl_command_queue); /* Flush for the specified gpgpu */ extern int cl_command_queue_flush_gpgpu(cl_command_queue, cl_gpgpu); /* Wait for the completion of the command queue */ extern cl_int cl_command_queue_finish(cl_command_queue); /* Bind all the surfaces in the GPGPU state */ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel); /* Bind all the image surfaces in the GPGPU state */ extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel); /* Insert a user event to command's wait_events */ extern void cl_command_queue_insert_event(cl_command_queue, cl_event); /* Remove a user event from command's wait_events */ extern void cl_command_queue_remove_event(cl_command_queue, cl_event); extern void cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event); extern void cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event); #endif /* __CL_COMMAND_QUEUE_H__ */ Beignet-1.1.1-Source/src/cl_context.h000664 001750 001750 00000017627 12576733264 016613 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_CONTEXT_H__ #define __CL_CONTEXT_H__ #include "CL/cl.h" #include "cl_internals.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include #include /* DRI device created at create context */ struct intel_driver; enum _cl_gl_context_type { CL_GL_NOSHARE, CL_GL_EGL_DISPLAY, CL_GL_GLX_DISPLAY, CL_GL_WGL_HDC, CL_GL_CGL_SHAREGROUP }; enum _cl_internal_ker_type { CL_INTERNAL_KERNEL_MIN = 0, CL_ENQUEUE_COPY_BUFFER_ALIGN4 = 0, CL_ENQUEUE_COPY_BUFFER_ALIGN16, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4, CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d array CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 1d array CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 2d array CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image 2d CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image 3d CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d array CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, pattern size=1 CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, pattern size=2 CL_ENQUEUE_FILL_BUFFER_ALIGN4, //fill buffer with 4 aligne pattern, pattern size=4 CL_ENQUEUE_FILL_BUFFER_ALIGN8_8, //fill buffer with 8 aligne pattern, pattern size=8 CL_ENQUEUE_FILL_BUFFER_ALIGN8_16, //fill buffer with 16 aligne pattern, pattern size=16 CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, //fill buffer with 16 aligne pattern, pattern size=32 CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, //fill buffer with 16 aligne pattern, pattern size=64 CL_ENQUEUE_FILL_BUFFER_ALIGN128, //fill buffer with 128 aligne pattern, pattern size=128 CL_ENQUEUE_FILL_IMAGE_1D, //fill image 1d CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array CL_ENQUEUE_FILL_IMAGE_2D, //fill image 2d CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array CL_ENQUEUE_FILL_IMAGE_3D, //fill image 3d CL_INTERNAL_KERNEL_MAX }; struct _cl_context_prop { cl_context_properties platform_id; enum _cl_gl_context_type gl_type; cl_context_properties gl_context; union { cl_context_properties egl_display; cl_context_properties glx_display; cl_context_properties wgl_hdc; cl_context_properties cgl_sharegroup; }; }; #define IS_EGL_CONTEXT(ctx) (ctx->props.gl_type == CL_GL_EGL_DISPLAY) #define EGL_DISP(ctx) (EGLDisplay)(ctx->props.egl_display) #define EGL_CTX(ctx) (EGLContext)(ctx->props.gl_context) /* Encapsulate the whole device */ struct _cl_context { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a context */ volatile int ref_n; /* We reference count this object */ cl_driver drv; /* Handles HW or simulator */ cl_device_id device; /* All information about the GPU device */ cl_command_queue queues; /* All command queues currently allocated */ cl_program programs; /* All programs currently allocated */ cl_mem buffers; /* All memory object currently allocated */ cl_sampler samplers; /* All sampler object currently allocated */ cl_event events; /* All event object currently allocated */ pthread_mutex_t queue_lock; /* To allocate and deallocate queues */ pthread_mutex_t program_lock; /* To allocate and deallocate programs */ pthread_mutex_t buffer_lock; /* To allocate and deallocate buffers */ pthread_mutex_t sampler_lock; /* To allocate and deallocate samplers */ pthread_mutex_t event_lock; /* To allocate and deallocate events */ cl_program internal_prgs[CL_INTERNAL_KERNEL_MAX]; /* All programs internal used, for example clEnqueuexxx api use */ cl_kernel internal_kernels[CL_INTERNAL_KERNEL_MAX]; /* All kernels for clenqueuexxx api, for example clEnqueuexxx api use */ cl_program built_in_prgs; /*all built-in kernels belongs to this program only*/ cl_kernel built_in_kernels[CL_INTERNAL_KERNEL_MAX]; uint32_t ver; /* Gen version */ struct _cl_context_prop props; cl_context_properties * prop_user; /* a copy of user passed context properties when create context */ cl_uint prop_len; /* count of the properties */ void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *); /* User's callback when error occur in context */ void *user_data; /* A pointer to user supplied data */ }; /* Implement OpenCL function */ extern cl_context cl_create_context(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK * pfn_notify) (const char*, const void*, size_t, void*), void *, cl_int*); /* Allocate and initialize a context */ extern cl_context cl_context_new(struct _cl_context_prop *); /* Destroy and deallocate a context */ extern void cl_context_delete(cl_context); /* Increment the context reference counter */ extern void cl_context_add_ref(cl_context); /* Create the command queue from the given context and device */ extern cl_command_queue cl_context_create_queue(cl_context, cl_device_id, cl_command_queue_properties, cl_int*); /* Enqueue a ND Range kernel */ extern cl_int cl_context_ND_kernel(cl_context, cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*); /* Used for allocation */ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); /* Get the internal used kernel from binary*/ extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option); #endif /* __CL_CONTEXT_H__ */ Beignet-1.1.1-Source/src/cl_gbe_loader.cpp000664 001750 001750 00000033541 12576744576 017546 0ustar00yryr000000 000000 /* * Copyright © 2014 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include #include #include #include #include "cl_gbe_loader.h" #include "backend/src/GBEConfig.h" //function pointer from libgbe.so gbe_program_new_from_source_cb *compiler_program_new_from_source = NULL; gbe_program_compile_from_source_cb *compiler_program_compile_from_source = NULL; gbe_program_new_gen_program_cb *compiler_program_new_gen_program = NULL; gbe_program_link_program_cb *compiler_program_link_program = NULL; gbe_program_check_opt_cb *compiler_program_check_opt = NULL; gbe_program_build_from_llvm_cb *compiler_program_build_from_llvm = NULL; gbe_program_new_from_llvm_binary_cb *compiler_program_new_from_llvm_binary = NULL; gbe_program_serialize_to_binary_cb *compiler_program_serialize_to_binary = NULL; gbe_program_new_from_llvm_cb *compiler_program_new_from_llvm = NULL; gbe_program_clean_llvm_resource_cb *compiler_program_clean_llvm_resource = NULL; //function pointer from libgbeinterp.so gbe_program_new_from_binary_cb *interp_program_new_from_binary = NULL; gbe_program_get_global_constant_size_cb *interp_program_get_global_constant_size = NULL; gbe_program_get_global_constant_data_cb *interp_program_get_global_constant_data = NULL; gbe_program_delete_cb *interp_program_delete = NULL; gbe_program_get_kernel_num_cb *interp_program_get_kernel_num = NULL; gbe_program_get_kernel_by_name_cb *interp_program_get_kernel_by_name = NULL; gbe_program_get_kernel_cb *interp_program_get_kernel = NULL; gbe_kernel_get_name_cb *interp_kernel_get_name = NULL; gbe_kernel_get_attributes_cb *interp_kernel_get_attributes = NULL; gbe_kernel_get_code_cb *interp_kernel_get_code = NULL; gbe_kernel_get_code_size_cb *interp_kernel_get_code_size = NULL; gbe_kernel_get_arg_num_cb *interp_kernel_get_arg_num = NULL; gbe_kernel_get_arg_size_cb *interp_kernel_get_arg_size = NULL; gbe_kernel_get_arg_bti_cb *interp_kernel_get_arg_bti = NULL; gbe_kernel_get_arg_type_cb *interp_kernel_get_arg_type = NULL; gbe_kernel_get_arg_align_cb *interp_kernel_get_arg_align = NULL; gbe_kernel_get_simd_width_cb *interp_kernel_get_simd_width = NULL; gbe_kernel_get_curbe_offset_cb *interp_kernel_get_curbe_offset = NULL; gbe_kernel_get_curbe_size_cb *interp_kernel_get_curbe_size = NULL; gbe_kernel_get_stack_size_cb *interp_kernel_get_stack_size = NULL; gbe_kernel_get_scratch_size_cb *interp_kernel_get_scratch_size = NULL; gbe_kernel_get_required_work_group_size_cb *interp_kernel_get_required_work_group_size = NULL; gbe_kernel_use_slm_cb *interp_kernel_use_slm = NULL; gbe_kernel_get_slm_size_cb *interp_kernel_get_slm_size = NULL; gbe_kernel_get_sampler_size_cb *interp_kernel_get_sampler_size = NULL; gbe_kernel_get_sampler_data_cb *interp_kernel_get_sampler_data = NULL; gbe_kernel_get_compile_wg_size_cb *interp_kernel_get_compile_wg_size = NULL; gbe_kernel_get_image_size_cb *interp_kernel_get_image_size = NULL; gbe_kernel_get_image_data_cb *interp_kernel_get_image_data = NULL; gbe_get_printf_num_cb* interp_get_printf_num = NULL; gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti = NULL; gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti = NULL; gbe_dup_printfset_cb* interp_dup_printfset = NULL; gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size = NULL; gbe_release_printf_info_cb* interp_release_printf_info = NULL; gbe_output_printf_cb* interp_output_printf = NULL; gbe_kernel_get_arg_info_cb *interp_kernel_get_arg_info = NULL; struct GbeLoaderInitializer { GbeLoaderInitializer() { LoadCompiler(); const char* path; if (!LoadInterp(path)) std::cerr << "unable to load " << path << " which is part of the driver, please check!" << std::endl; } bool LoadInterp(const char*& path) { const char* interpPath = getenv("OCL_INTERP_PATH"); if (interpPath == NULL) interpPath = INTERP_OBJECT_DIR; path = interpPath; dlhInterp = dlopen(interpPath, RTLD_LAZY | RTLD_LOCAL); if (dlhInterp == NULL) { return false; } interp_program_new_from_binary = *(gbe_program_new_from_binary_cb**)dlsym(dlhInterp, "gbe_program_new_from_binary"); if (interp_program_new_from_binary == NULL) return false; interp_program_get_global_constant_size = *(gbe_program_get_global_constant_size_cb**)dlsym(dlhInterp, "gbe_program_get_global_constant_size"); if (interp_program_get_global_constant_size == NULL) return false; interp_program_get_global_constant_data = *(gbe_program_get_global_constant_data_cb**)dlsym(dlhInterp, "gbe_program_get_global_constant_data"); if (interp_program_get_global_constant_data == NULL) return false; interp_program_delete = *(gbe_program_delete_cb**)dlsym(dlhInterp, "gbe_program_delete"); if (interp_program_delete == NULL) return false; interp_program_get_kernel_num = *(gbe_program_get_kernel_num_cb**)dlsym(dlhInterp, "gbe_program_get_kernel_num"); if (interp_program_get_kernel_num == NULL) return false; interp_program_get_kernel_by_name = *(gbe_program_get_kernel_by_name_cb**)dlsym(dlhInterp, "gbe_program_get_kernel_by_name"); if (interp_program_get_kernel_by_name == NULL) return false; interp_program_get_kernel = *(gbe_program_get_kernel_cb**)dlsym(dlhInterp, "gbe_program_get_kernel"); if (interp_program_get_kernel == NULL) return false; interp_kernel_get_name = *(gbe_kernel_get_name_cb**)dlsym(dlhInterp, "gbe_kernel_get_name"); if (interp_kernel_get_name == NULL) return false; interp_kernel_get_attributes = *(gbe_kernel_get_attributes_cb**)dlsym(dlhInterp, "gbe_kernel_get_attributes"); if (interp_kernel_get_attributes == NULL) return false; interp_kernel_get_code = *(gbe_kernel_get_code_cb**)dlsym(dlhInterp, "gbe_kernel_get_code"); if (interp_kernel_get_code == NULL) return false; interp_kernel_get_code_size = *(gbe_kernel_get_code_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_code_size"); if (interp_kernel_get_code_size == NULL) return false; interp_kernel_get_arg_num = *(gbe_kernel_get_arg_num_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_num"); if (interp_kernel_get_arg_num == NULL) return false; interp_kernel_get_arg_size = *(gbe_kernel_get_arg_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_size"); if (interp_kernel_get_arg_size == NULL) return false; interp_kernel_get_arg_bti = *(gbe_kernel_get_arg_bti_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_bti"); if (interp_kernel_get_arg_bti == NULL) return false; interp_kernel_get_arg_type = *(gbe_kernel_get_arg_type_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_type"); if (interp_kernel_get_arg_type == NULL) return false; interp_kernel_get_arg_align = *(gbe_kernel_get_arg_align_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_align"); if (interp_kernel_get_arg_align == NULL) return false; interp_kernel_get_simd_width = *(gbe_kernel_get_simd_width_cb**)dlsym(dlhInterp, "gbe_kernel_get_simd_width"); if (interp_kernel_get_simd_width == NULL) return false; interp_kernel_get_curbe_offset = *(gbe_kernel_get_curbe_offset_cb**)dlsym(dlhInterp, "gbe_kernel_get_curbe_offset"); if (interp_kernel_get_curbe_offset == NULL) return false; interp_kernel_get_curbe_size = *(gbe_kernel_get_curbe_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_curbe_size"); if (interp_kernel_get_curbe_size == NULL) return false; interp_kernel_get_stack_size = *(gbe_kernel_get_stack_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_stack_size"); if (interp_kernel_get_stack_size == NULL) return false; interp_kernel_get_scratch_size = *(gbe_kernel_get_scratch_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_scratch_size"); if (interp_kernel_get_scratch_size == NULL) return false; interp_kernel_get_required_work_group_size = *(gbe_kernel_get_required_work_group_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_required_work_group_size"); if (interp_kernel_get_required_work_group_size == NULL) return false; interp_kernel_use_slm = *(gbe_kernel_use_slm_cb**)dlsym(dlhInterp, "gbe_kernel_use_slm"); if (interp_kernel_use_slm == NULL) return false; interp_kernel_get_slm_size = *(gbe_kernel_get_slm_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_slm_size"); if (interp_kernel_get_slm_size == NULL) return false; interp_kernel_get_sampler_size = *(gbe_kernel_get_sampler_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_sampler_size"); if (interp_kernel_get_sampler_size == NULL) return false; interp_kernel_get_sampler_data = *(gbe_kernel_get_sampler_data_cb**)dlsym(dlhInterp, "gbe_kernel_get_sampler_data"); if (interp_kernel_get_sampler_data == NULL) return false; interp_kernel_get_compile_wg_size = *(gbe_kernel_get_compile_wg_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_compile_wg_size"); if (interp_kernel_get_compile_wg_size == NULL) return false; interp_kernel_get_image_size = *(gbe_kernel_get_image_size_cb**)dlsym(dlhInterp, "gbe_kernel_get_image_size"); if (interp_kernel_get_image_size == NULL) return false; interp_kernel_get_image_data = *(gbe_kernel_get_image_data_cb**)dlsym(dlhInterp, "gbe_kernel_get_image_data"); if (interp_kernel_get_image_data == NULL) return false; interp_get_printf_num = *(gbe_get_printf_num_cb**)dlsym(dlhInterp, "gbe_get_printf_num"); if (interp_get_printf_num == NULL) return false; interp_get_printf_buf_bti = *(gbe_get_printf_buf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_buf_bti"); if (interp_get_printf_buf_bti == NULL) return false; interp_get_printf_indexbuf_bti = *(gbe_get_printf_indexbuf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_indexbuf_bti"); if (interp_get_printf_indexbuf_bti == NULL) return false; interp_dup_printfset = *(gbe_dup_printfset_cb**)dlsym(dlhInterp, "gbe_dup_printfset"); if (interp_dup_printfset == NULL) return false; interp_get_printf_sizeof_size = *(gbe_get_printf_sizeof_size_cb**)dlsym(dlhInterp, "gbe_get_printf_sizeof_size"); if (interp_get_printf_sizeof_size == NULL) return false; interp_release_printf_info = *(gbe_release_printf_info_cb**)dlsym(dlhInterp, "gbe_release_printf_info"); if (interp_release_printf_info == NULL) return false; interp_output_printf = *(gbe_output_printf_cb**)dlsym(dlhInterp, "gbe_output_printf"); if (interp_output_printf == NULL) return false; interp_kernel_get_arg_info = *(gbe_kernel_get_arg_info_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_info"); if (interp_kernel_get_arg_info == NULL) return false; return true; } void LoadCompiler() { compilerLoaded = false; const char* nonCompiler = getenv("OCL_NON_COMPILER"); if (nonCompiler != NULL) { if (strcmp(nonCompiler, "1") == 0) return; } const char* gbePath = getenv("OCL_GBE_PATH"); if (gbePath == NULL) gbePath = GBE_OBJECT_DIR; dlhCompiler = dlopen(gbePath, RTLD_LAZY | RTLD_LOCAL); if (dlhCompiler != NULL) { compiler_program_new_from_source = *(gbe_program_new_from_source_cb **)dlsym(dlhCompiler, "gbe_program_new_from_source"); if (compiler_program_new_from_source == NULL) return; compiler_program_compile_from_source = *(gbe_program_compile_from_source_cb **)dlsym(dlhCompiler, "gbe_program_compile_from_source"); if (compiler_program_compile_from_source == NULL) return; compiler_program_new_gen_program = *(gbe_program_new_gen_program_cb **)dlsym(dlhCompiler, "gbe_program_new_gen_program"); if (compiler_program_new_gen_program == NULL) return; compiler_program_link_program = *(gbe_program_link_program_cb **)dlsym(dlhCompiler, "gbe_program_link_program"); if (compiler_program_link_program == NULL) return; compiler_program_check_opt = *(gbe_program_check_opt_cb **)dlsym(dlhCompiler, "gbe_program_check_opt"); if (compiler_program_check_opt == NULL) return; compiler_program_build_from_llvm = *(gbe_program_build_from_llvm_cb **)dlsym(dlhCompiler, "gbe_program_build_from_llvm"); if (compiler_program_build_from_llvm == NULL) return; compiler_program_new_from_llvm_binary = *(gbe_program_new_from_llvm_binary_cb **)dlsym(dlhCompiler, "gbe_program_new_from_llvm_binary"); if (compiler_program_new_from_llvm_binary == NULL) return; compiler_program_serialize_to_binary = *(gbe_program_serialize_to_binary_cb **)dlsym(dlhCompiler, "gbe_program_serialize_to_binary"); if (compiler_program_serialize_to_binary == NULL) return; compiler_program_new_from_llvm = *(gbe_program_new_from_llvm_cb **)dlsym(dlhCompiler, "gbe_program_new_from_llvm"); if (compiler_program_new_from_llvm == NULL) return; compiler_program_clean_llvm_resource = *(gbe_program_clean_llvm_resource_cb **)dlsym(dlhCompiler, "gbe_program_clean_llvm_resource"); if (compiler_program_clean_llvm_resource == NULL) return; compilerLoaded = true; } } ~GbeLoaderInitializer() { if (dlhCompiler != NULL) dlclose(dlhCompiler); if (dlhInterp != NULL) dlclose(dlhInterp); } bool compilerLoaded; void *dlhCompiler; void *dlhInterp; }; static struct GbeLoaderInitializer gbeLoader; int CompilerSupported() { if (gbeLoader.compilerLoaded) return 1; else return 0; } Beignet-1.1.1-Source/src/cl_device_id.c000664 001750 001750 00000126256 12605356050 017017 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_internals.h" #include "cl_utils.h" #include "cl_driver.h" #include "cl_device_data.h" #include "cl_khr_icd.h" #include "cl_thread.h" #include "CL/cl.h" #include "CL/cl_ext.h" #include "cl_gbe_loader.h" #include "cl_alloc.h" #include #include #include #include #include #ifndef CL_VERSION_1_2 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F #endif static struct _cl_device_id intel_ivb_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 16, .max_thread_per_unit = 8, .sub_slice_count = 2, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen7_device.h" }; static struct _cl_device_id intel_ivb_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 6, .max_thread_per_unit = 6, .sub_slice_count = 1, .max_work_item_sizes = {256, 256, 256}, .max_work_group_size = 256, .max_clock_frequency = 1000, #include "cl_gen7_device.h" }; static struct _cl_device_id intel_baytrail_t_device = { INIT_ICD(dispatch) .max_compute_unit = 4, .max_thread_per_unit = 8, .sub_slice_count = 1, .max_work_item_sizes = {256, 256, 256}, .max_work_group_size = 256, .max_clock_frequency = 1000, #include "cl_gen7_device.h" }; /* XXX we clone IVB for HSW now */ static struct _cl_device_id intel_hsw_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 10, .max_thread_per_unit = 7, .sub_slice_count = 1, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_hsw_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 20, .max_thread_per_unit = 7, .sub_slice_count = 2, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_hsw_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 40, .max_thread_per_unit = 7, .sub_slice_count = 4, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; /* XXX we clone IVB for HSW now */ static struct _cl_device_id intel_brw_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 12, .max_thread_per_unit = 7, .sub_slice_count = 2, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_brw_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 24, .max_thread_per_unit = 7, .sub_slice_count = 3, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_brw_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 48, .max_thread_per_unit = 7, .sub_slice_count = 6, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; //Cherryview has the same pciid, must get the max_compute_unit and max_thread_per_unit from drm static struct _cl_device_id intel_chv_device = { INIT_ICD(dispatch) .max_compute_unit = 8, .max_thread_per_unit = 7, .sub_slice_count = 2, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; /* XXX we clone brw now */ static struct _cl_device_id intel_skl_gt1_device = { INIT_ICD(dispatch) .max_compute_unit = 6, .max_thread_per_unit = 7, .sub_slice_count = 2, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_skl_gt2_device = { INIT_ICD(dispatch) .max_compute_unit = 24, .max_thread_per_unit = 7, .sub_slice_count = 3, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_skl_gt3_device = { INIT_ICD(dispatch) .max_compute_unit = 48, .max_thread_per_unit = 7, .sub_slice_count = 6, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; static struct _cl_device_id intel_skl_gt4_device = { INIT_ICD(dispatch) .max_compute_unit = 72, .max_thread_per_unit = 7, .sub_slice_count = 9, .max_work_item_sizes = {512, 512, 512}, .max_work_group_size = 512, .max_clock_frequency = 1000, #include "cl_gen75_device.h" }; LOCAL cl_device_id cl_get_gt_device(void) { cl_device_id ret = NULL; const int device_id = cl_driver_get_device_id(); cl_device_id device = NULL; #define DECL_INFO_STRING(BREAK, STRUCT, FIELD, STRING) \ STRUCT.FIELD = STRING; \ STRUCT.JOIN(FIELD,_sz) = sizeof(STRING); \ device = &STRUCT; \ goto BREAK; switch (device_id) { case PCI_CHIP_HASWELL_D1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell GT1 Desktop"); case PCI_CHIP_HASWELL_D2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell GT2 Desktop"); case PCI_CHIP_HASWELL_D3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell GT3 Desktop"); case PCI_CHIP_HASWELL_S1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell GT1 Server"); case PCI_CHIP_HASWELL_S2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell GT2 Server"); case PCI_CHIP_HASWELL_S3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell GT3 Server"); case PCI_CHIP_HASWELL_M1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell GT1 Mobile"); case PCI_CHIP_HASWELL_M2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell GT2 Mobile"); case PCI_CHIP_HASWELL_M3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell GT3 Mobile"); case PCI_CHIP_HASWELL_B1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell GT1 reserved"); case PCI_CHIP_HASWELL_B2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell GT2 reserved"); case PCI_CHIP_HASWELL_B3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell GT3 reserved"); case PCI_CHIP_HASWELL_E1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell GT1 reserved"); case PCI_CHIP_HASWELL_E2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell GT2 reserved"); case PCI_CHIP_HASWELL_E3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell GT3 reserved"); case PCI_CHIP_HASWELL_SDV_D1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT1 Desktop"); case PCI_CHIP_HASWELL_SDV_D2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT2 Desktop"); case PCI_CHIP_HASWELL_SDV_D3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT3 Desktop"); case PCI_CHIP_HASWELL_SDV_S1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT1 Server"); case PCI_CHIP_HASWELL_SDV_S2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT2 Server"); case PCI_CHIP_HASWELL_SDV_S3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT3 Server"); case PCI_CHIP_HASWELL_SDV_M1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT1 Mobile"); case PCI_CHIP_HASWELL_SDV_M2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT2 Mobile"); case PCI_CHIP_HASWELL_SDV_M3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT3 Mobile"); case PCI_CHIP_HASWELL_SDV_B1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT1 reserved"); case PCI_CHIP_HASWELL_SDV_B2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT2 reserved"); case PCI_CHIP_HASWELL_SDV_B3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT3 reserved"); case PCI_CHIP_HASWELL_SDV_E1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT1 reserved"); case PCI_CHIP_HASWELL_SDV_E2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT2 reserved"); case PCI_CHIP_HASWELL_SDV_E3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell" " Software Development Vehicle device GT3 reserved"); case PCI_CHIP_HASWELL_ULT_D1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT1 Desktop"); case PCI_CHIP_HASWELL_ULT_D2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT2 Desktop"); case PCI_CHIP_HASWELL_ULT_D3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT3 Desktop"); case PCI_CHIP_HASWELL_ULT_S1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT1 Server"); case PCI_CHIP_HASWELL_ULT_S2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT2 Server"); case PCI_CHIP_HASWELL_ULT_S3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT3 Server"); case PCI_CHIP_HASWELL_ULT_M1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT1 Mobile"); case PCI_CHIP_HASWELL_ULT_M2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile"); case PCI_CHIP_HASWELL_ULT_M3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT3 Mobile"); case PCI_CHIP_HASWELL_ULT_B1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT1 reserved"); case PCI_CHIP_HASWELL_ULT_B2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT2 reserved"); case PCI_CHIP_HASWELL_ULT_B3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT3 reserved"); case PCI_CHIP_HASWELL_ULT_E1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT1 reserved"); case PCI_CHIP_HASWELL_ULT_E2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT2 reserved"); case PCI_CHIP_HASWELL_ULT_E3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell Ultrabook GT3 reserved"); /* CRW */ case PCI_CHIP_HASWELL_CRW_D1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell CRW GT1 Desktop"); case PCI_CHIP_HASWELL_CRW_D2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell CRW GT2 Desktop"); case PCI_CHIP_HASWELL_CRW_D3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 Desktop"); case PCI_CHIP_HASWELL_CRW_S1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell CRW GT1 Server"); case PCI_CHIP_HASWELL_CRW_S2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell CRW GT2 Server"); case PCI_CHIP_HASWELL_CRW_S3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 Server"); case PCI_CHIP_HASWELL_CRW_M1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell CRW GT1 Mobile"); case PCI_CHIP_HASWELL_CRW_M2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell CRW GT2 Mobile"); case PCI_CHIP_HASWELL_CRW_M3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 Mobile"); case PCI_CHIP_HASWELL_CRW_B1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell CRW GT1 reserved"); case PCI_CHIP_HASWELL_CRW_B2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell CRW GT2 reserved"); case PCI_CHIP_HASWELL_CRW_B3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 reserved"); case PCI_CHIP_HASWELL_CRW_E1: DECL_INFO_STRING(has_break, intel_hsw_gt1_device, name, "Intel(R) HD Graphics Haswell CRW GT1 reserved"); case PCI_CHIP_HASWELL_CRW_E2: DECL_INFO_STRING(has_break, intel_hsw_gt2_device, name, "Intel(R) HD Graphics Haswell CRW GT2 reserved"); case PCI_CHIP_HASWELL_CRW_E3: DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 reserved"); has_break: device->device_id = device_id; device->platform = cl_get_platform_default(); ret = device; cl_intel_platform_get_default_extension(ret); break; case PCI_CHIP_IVYBRIDGE_GT1: DECL_INFO_STRING(ivb_gt1_break, intel_ivb_gt1_device, name, "Intel(R) HD Graphics IvyBridge GT1"); case PCI_CHIP_IVYBRIDGE_M_GT1: DECL_INFO_STRING(ivb_gt1_break, intel_ivb_gt1_device, name, "Intel(R) HD Graphics IvyBridge M GT1"); case PCI_CHIP_IVYBRIDGE_S_GT1: DECL_INFO_STRING(ivb_gt1_break, intel_ivb_gt1_device, name, "Intel(R) HD Graphics IvyBridge S GT1"); ivb_gt1_break: intel_ivb_gt1_device.device_id = device_id; intel_ivb_gt1_device.platform = cl_get_platform_default(); ret = &intel_ivb_gt1_device; cl_intel_platform_get_default_extension(ret); break; case PCI_CHIP_IVYBRIDGE_GT2: DECL_INFO_STRING(ivb_gt2_break, intel_ivb_gt2_device, name, "Intel(R) HD Graphics IvyBridge GT2"); case PCI_CHIP_IVYBRIDGE_M_GT2: DECL_INFO_STRING(ivb_gt2_break, intel_ivb_gt2_device, name, "Intel(R) HD Graphics IvyBridge M GT2"); case PCI_CHIP_IVYBRIDGE_S_GT2: DECL_INFO_STRING(ivb_gt2_break, intel_ivb_gt2_device, name, "Intel(R) HD Graphics IvyBridge S GT2"); ivb_gt2_break: intel_ivb_gt2_device.device_id = device_id; intel_ivb_gt2_device.platform = cl_get_platform_default(); ret = &intel_ivb_gt2_device; cl_intel_platform_get_default_extension(ret); break; case PCI_CHIP_BAYTRAIL_T: DECL_INFO_STRING(baytrail_t_device_break, intel_baytrail_t_device, name, "Intel(R) HD Graphics Bay Trail-T"); baytrail_t_device_break: intel_baytrail_t_device.device_id = device_id; intel_baytrail_t_device.platform = cl_get_platform_default(); ret = &intel_baytrail_t_device; cl_intel_platform_get_default_extension(ret); break; case PCI_CHIP_BROADWLL_M_GT1: DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell Mobile GT1"); case PCI_CHIP_BROADWLL_D_GT1: DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell U-Processor GT1"); case PCI_CHIP_BROADWLL_S_GT1: DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell Server GT1"); case PCI_CHIP_BROADWLL_W_GT1: DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell Workstation GT1"); case PCI_CHIP_BROADWLL_U_GT1: DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell ULX GT1"); brw_gt1_break: /* For Gen8 and later, half float is suppported and we will enable cl_khr_fp16. */ intel_brw_gt1_device.device_id = device_id; intel_brw_gt1_device.platform = cl_get_platform_default(); ret = &intel_brw_gt1_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_BROADWLL_M_GT2: DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5600 BroadWell Mobile GT2"); case PCI_CHIP_BROADWLL_D_GT2: DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2"); case PCI_CHIP_BROADWLL_S_GT2: DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Server GT2"); case PCI_CHIP_BROADWLL_W_GT2: DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Workstation GT2"); case PCI_CHIP_BROADWLL_U_GT2: DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5300 BroadWell ULX GT2"); brw_gt2_break: intel_brw_gt2_device.device_id = device_id; intel_brw_gt2_device.platform = cl_get_platform_default(); ret = &intel_brw_gt2_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_BROADWLL_M_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Pro Graphics 6200 BroadWell Mobile GT3"); case PCI_CHIP_BROADWLL_D_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics 6000 BroadWell U-Processor GT3"); case PCI_CHIP_BROADWLL_UI_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Graphics 6100 BroadWell U-Processor GT3"); case PCI_CHIP_BROADWLL_S_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Pro Graphics P6300 BroadWell Server GT3"); case PCI_CHIP_BROADWLL_W_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Workstation GT3"); case PCI_CHIP_BROADWLL_U_GT3: DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell ULX GT3"); brw_gt3_break: intel_brw_gt3_device.device_id = device_id; intel_brw_gt3_device.platform = cl_get_platform_default(); ret = &intel_brw_gt3_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_CHV_0: case PCI_CHIP_CHV_1: case PCI_CHIP_CHV_2: case PCI_CHIP_CHV_3: DECL_INFO_STRING(chv_break, intel_chv_device, name, "Intel(R) HD Graphics Cherryview"); chv_break: intel_chv_device.device_id = device_id; intel_chv_device.platform = cl_get_platform_default(); ret = &intel_chv_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_SKYLAKE_ULT_GT1: DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1"); case PCI_CHIP_SKYLAKE_ULX_GT1: DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1"); case PCI_CHIP_SKYLAKE_DT_GT1: DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1"); case PCI_CHIP_SKYLAKE_HALO_GT1: DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1"); case PCI_CHIP_SKYLAKE_SRV_GT1: DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1"); skl_gt1_break: intel_skl_gt1_device.device_id = device_id; intel_skl_gt1_device.platform = cl_get_platform_default(); ret = &intel_skl_gt1_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_SKYLAKE_ULT_GT2: DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2"); case PCI_CHIP_SKYLAKE_ULT_GT2F: DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F"); case PCI_CHIP_SKYLAKE_ULX_GT2: DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2"); case PCI_CHIP_SKYLAKE_DT_GT2: DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2"); case PCI_CHIP_SKYLAKE_HALO_GT2: DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2"); case PCI_CHIP_SKYLAKE_SRV_GT2: DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2"); skl_gt2_break: intel_skl_gt2_device.device_id = device_id; intel_skl_gt2_device.platform = cl_get_platform_default(); ret = &intel_skl_gt2_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_SKYLAKE_ULT_GT3: DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3"); case PCI_CHIP_SKYLAKE_HALO_GT3: DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3"); case PCI_CHIP_SKYLAKE_SRV_GT3: DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3"); skl_gt3_break: intel_skl_gt3_device.device_id = device_id; intel_skl_gt3_device.platform = cl_get_platform_default(); ret = &intel_skl_gt3_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_SKYLAKE_HALO_GT4: DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4"); case PCI_CHIP_SKYLAKE_SRV_GT4: DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4"); skl_gt4_break: intel_skl_gt4_device.device_id = device_id; intel_skl_gt4_device.platform = cl_get_platform_default(); ret = &intel_skl_gt4_device; cl_intel_platform_enable_fp16_extension(ret); break; case PCI_CHIP_SANDYBRIDGE_BRIDGE: case PCI_CHIP_SANDYBRIDGE_GT1: case PCI_CHIP_SANDYBRIDGE_GT2: case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: case PCI_CHIP_SANDYBRIDGE_BRIDGE_M: case PCI_CHIP_SANDYBRIDGE_M_GT1: case PCI_CHIP_SANDYBRIDGE_M_GT2: case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: case PCI_CHIP_SANDYBRIDGE_BRIDGE_S: case PCI_CHIP_SANDYBRIDGE_S_GT: // Intel(R) HD Graphics SandyBridge not supported yet ret = NULL; break; default: printf("cl_get_gt_device(): error, unknown device: %x\n", device_id); } if (ret == NULL) return NULL; if (!CompilerSupported()) { ret->compiler_available = CL_FALSE; //ret->linker_available = CL_FALSE; ret->profile = "EMBEDDED_PROFILE"; ret->profile_sz = strlen(ret->profile) + 1; } /* Apply any driver-dependent updates to the device info */ cl_driver_update_device_info(ret); struct sysinfo info; if (sysinfo(&info) == 0) { uint64_t two_gb = 2 * 1024 * 1024 * 1024ul; uint64_t totalram = info.totalram * info.mem_unit; ret->global_mem_size = (totalram > two_gb) ? two_gb : info.totalram; ret->max_mem_alloc_size = ret->global_mem_size / 2; } return ret; } /* Runs a small kernel to check that the device works; returns * SELF_TEST_PASS: for success. * SELF_TEST_SLM_FAIL: for SLM results mismatch; * SELF_TEST_ATOMIC_FAIL: for hsw enqueue kernel failure to not enable atomics in L3. * SELF_TEST_OTHER_FAIL: other fail like runtime API fail.*/ LOCAL cl_self_test_res cl_self_test(cl_device_id device, cl_self_test_res atomic_in_l3_flag) { cl_int status; cl_context ctx; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_mem buffer; cl_event kernel_finished; size_t n = 3; cl_int test_data[3] = {3, 7, 5}; const char* kernel_source = "__kernel void self_test(__global int *buf) {" " __local int tmp[3];" " tmp[get_local_id(0)] = buf[get_local_id(0)];" " barrier(CLK_LOCAL_MEM_FENCE);" " buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];" "}"; // using __local to catch the "no SLM on Haswell" problem static int tested = 0; static cl_self_test_res ret = SELF_TEST_OTHER_FAIL; if (tested != 0) return ret; tested = 1; ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &status); cl_driver_set_atomic_flag(ctx->drv, atomic_in_l3_flag); if (status == CL_SUCCESS) { queue = clCreateCommandQueue(ctx, device, 0, &status); if (status == CL_SUCCESS) { program = clCreateProgramWithSource(ctx, 1, &kernel_source, NULL, &status); if (status == CL_SUCCESS) { status = clBuildProgram(program, 1, &device, "", NULL, NULL); if (status == CL_SUCCESS) { kernel = clCreateKernel(program, "self_test", &status); if (status == CL_SUCCESS) { buffer = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n*4, test_data, &status); if (status == CL_SUCCESS) { status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer); if (status == CL_SUCCESS) { status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &n, &n, 0, NULL, &kernel_finished); if (status == CL_SUCCESS) { status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 1, &kernel_finished, NULL); if (status == CL_SUCCESS) { if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){ ret = SELF_TEST_PASS; } else { ret = SELF_TEST_SLM_FAIL; printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n" "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", test_data[0], test_data[1], test_data[2]); } } } else{ ret = SELF_TEST_ATOMIC_FAIL; // Atomic fail need to test SLM again with atomic in L3 feature disabled. tested = 0; } } } clReleaseMemObject(buffer); } clReleaseKernel(kernel); } } clReleaseProgram(program); } clReleaseCommandQueue(queue); } clReleaseContext(ctx); return ret; } LOCAL cl_int cl_get_device_ids(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) { cl_device_id device; /* Do we have a usable device? */ device = cl_get_gt_device(); if (device) { cl_self_test_res ret = cl_self_test(device, SELF_TEST_PASS); if (ret == SELF_TEST_ATOMIC_FAIL) { device->atomic_test_result = ret; ret = cl_self_test(device, ret); printf("Beignet: warning - disable atomic in L3 feature.\n"); } if(ret == SELF_TEST_SLM_FAIL) { int disable_self_test = 0; // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++ const char *env = getenv("OCL_IGNORE_SELF_TEST"); if (env != NULL) { sscanf(env, "%i", &disable_self_test); } if (disable_self_test) { printf("Beignet: Warning - overriding self-test failure\n"); } else { printf("Beignet: disabling non-working device\n"); device = 0; } } } if (!device) { if (num_devices) *num_devices = 0; if (devices) *devices = 0; return CL_DEVICE_NOT_FOUND; } else { if (num_devices) *num_devices = 1; if (devices) { *devices = device; } return CL_SUCCESS; } } #define DECL_FIELD(CASE,FIELD) \ case JOIN(CL_DEVICE_,CASE): \ if (param_value_size_ret) { \ *param_value_size_ret = sizeof device->FIELD; \ if (!param_value) \ return CL_SUCCESS; \ } \ if (param_value_size < sizeof device->FIELD) \ return CL_INVALID_VALUE; \ memcpy(param_value, &device->FIELD, sizeof device->FIELD); \ return CL_SUCCESS; #define DECL_STRING_FIELD(CASE,FIELD) \ case JOIN(CL_DEVICE_,CASE): \ if (param_value_size_ret) { \ *param_value_size_ret = device->JOIN(FIELD,_sz); \ if (!param_value) \ return CL_SUCCESS; \ } \ if (param_value_size < device->JOIN(FIELD,_sz)) \ return CL_INVALID_VALUE; \ memcpy(param_value, device->FIELD, device->JOIN(FIELD,_sz)); \ return CL_SUCCESS; LOCAL cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device && device != &intel_baytrail_t_device && device != &intel_hsw_gt1_device && device != &intel_hsw_gt2_device && device != &intel_hsw_gt3_device && device != &intel_brw_gt1_device && device != &intel_brw_gt2_device && device != &intel_brw_gt3_device && device != &intel_chv_device && device != &intel_skl_gt1_device && device != &intel_skl_gt2_device && device != &intel_skl_gt3_device && device != &intel_skl_gt4_device )) return CL_INVALID_DEVICE; /* Find the correct parameter */ switch (param_name) { DECL_FIELD(TYPE, device_type) DECL_FIELD(VENDOR_ID, vendor_id) DECL_FIELD(MAX_COMPUTE_UNITS, max_compute_unit) DECL_FIELD(MAX_WORK_ITEM_DIMENSIONS, max_work_item_dimensions) DECL_FIELD(MAX_WORK_ITEM_SIZES, max_work_item_sizes) DECL_FIELD(MAX_WORK_GROUP_SIZE, max_work_group_size) DECL_FIELD(PREFERRED_VECTOR_WIDTH_CHAR, preferred_vector_width_char) DECL_FIELD(PREFERRED_VECTOR_WIDTH_SHORT, preferred_vector_width_short) DECL_FIELD(PREFERRED_VECTOR_WIDTH_INT, preferred_vector_width_int) DECL_FIELD(PREFERRED_VECTOR_WIDTH_LONG, preferred_vector_width_long) DECL_FIELD(PREFERRED_VECTOR_WIDTH_FLOAT, preferred_vector_width_float) DECL_FIELD(PREFERRED_VECTOR_WIDTH_DOUBLE, preferred_vector_width_double) DECL_FIELD(PREFERRED_VECTOR_WIDTH_HALF, preferred_vector_width_half) DECL_FIELD(NATIVE_VECTOR_WIDTH_CHAR, native_vector_width_char) DECL_FIELD(NATIVE_VECTOR_WIDTH_SHORT, native_vector_width_short) DECL_FIELD(NATIVE_VECTOR_WIDTH_INT, native_vector_width_int) DECL_FIELD(NATIVE_VECTOR_WIDTH_LONG, native_vector_width_long) DECL_FIELD(NATIVE_VECTOR_WIDTH_FLOAT, native_vector_width_float) DECL_FIELD(NATIVE_VECTOR_WIDTH_DOUBLE, native_vector_width_double) DECL_FIELD(NATIVE_VECTOR_WIDTH_HALF, native_vector_width_half) DECL_FIELD(MAX_CLOCK_FREQUENCY, max_clock_frequency) DECL_FIELD(ADDRESS_BITS, address_bits) DECL_FIELD(MAX_MEM_ALLOC_SIZE, max_mem_alloc_size) DECL_FIELD(IMAGE_SUPPORT, image_support) DECL_FIELD(MAX_READ_IMAGE_ARGS, max_read_image_args) DECL_FIELD(MAX_WRITE_IMAGE_ARGS, max_write_image_args) DECL_FIELD(IMAGE_MAX_ARRAY_SIZE, image_max_array_size) DECL_FIELD(IMAGE2D_MAX_WIDTH, image2d_max_width) DECL_FIELD(IMAGE2D_MAX_HEIGHT, image2d_max_height) DECL_FIELD(IMAGE3D_MAX_WIDTH, image3d_max_width) DECL_FIELD(IMAGE3D_MAX_HEIGHT, image3d_max_height) DECL_FIELD(IMAGE3D_MAX_DEPTH, image3d_max_depth) DECL_FIELD(MAX_SAMPLERS, max_samplers) DECL_FIELD(MAX_PARAMETER_SIZE, max_parameter_size) DECL_FIELD(MEM_BASE_ADDR_ALIGN, mem_base_addr_align) DECL_FIELD(MIN_DATA_TYPE_ALIGN_SIZE, min_data_type_align_size) DECL_FIELD(SINGLE_FP_CONFIG, single_fp_config) DECL_FIELD(HALF_FP_CONFIG, half_fp_config) DECL_FIELD(DOUBLE_FP_CONFIG, double_fp_config) DECL_FIELD(GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type) DECL_FIELD(GLOBAL_MEM_CACHELINE_SIZE, global_mem_cache_line_size) DECL_FIELD(GLOBAL_MEM_CACHE_SIZE, global_mem_cache_size) DECL_FIELD(GLOBAL_MEM_SIZE, global_mem_size) DECL_FIELD(MAX_CONSTANT_BUFFER_SIZE, max_constant_buffer_size) DECL_FIELD(IMAGE_MAX_BUFFER_SIZE, image_mem_size) DECL_FIELD(MAX_CONSTANT_ARGS, max_constant_args) DECL_FIELD(LOCAL_MEM_TYPE, local_mem_type) DECL_FIELD(LOCAL_MEM_SIZE, local_mem_size) DECL_FIELD(ERROR_CORRECTION_SUPPORT, error_correction_support) DECL_FIELD(HOST_UNIFIED_MEMORY, host_unified_memory) DECL_FIELD(PROFILING_TIMER_RESOLUTION, profiling_timer_resolution) DECL_FIELD(ENDIAN_LITTLE, endian_little) DECL_FIELD(AVAILABLE, available) DECL_FIELD(COMPILER_AVAILABLE, compiler_available) DECL_FIELD(LINKER_AVAILABLE, linker_available) DECL_FIELD(EXECUTION_CAPABILITIES, execution_capabilities) DECL_FIELD(QUEUE_PROPERTIES, queue_properties) DECL_FIELD(PLATFORM, platform) DECL_FIELD(PRINTF_BUFFER_SIZE, printf_buffer_size) DECL_FIELD(PREFERRED_INTEROP_USER_SYNC, interop_user_sync) DECL_STRING_FIELD(NAME, name) DECL_STRING_FIELD(VENDOR, vendor) DECL_STRING_FIELD(VERSION, version) DECL_STRING_FIELD(PROFILE, profile) DECL_STRING_FIELD(OPENCL_C_VERSION, opencl_c_version) DECL_STRING_FIELD(EXTENSIONS, extensions); DECL_STRING_FIELD(BUILT_IN_KERNELS, built_in_kernels) DECL_FIELD(PARENT_DEVICE, parent_device) DECL_FIELD(PARTITION_MAX_SUB_DEVICES, partition_max_sub_device) DECL_FIELD(PARTITION_PROPERTIES, partition_property) DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain) DECL_FIELD(PARTITION_TYPE, partition_type) DECL_FIELD(REFERENCE_COUNT, device_reference_count) case CL_DRIVER_VERSION: if (param_value_size_ret) { *param_value_size_ret = device->driver_version_sz; if (!param_value) return CL_SUCCESS; } if (param_value_size < device->driver_version_sz) return CL_INVALID_VALUE; memcpy(param_value, device->driver_version, device->driver_version_sz); return CL_SUCCESS; default: return CL_INVALID_VALUE; }; } LOCAL cl_int cl_device_get_version(cl_device_id device, cl_int *ver) { if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device && device != &intel_baytrail_t_device && device != &intel_hsw_gt1_device && device != &intel_hsw_gt2_device && device != &intel_hsw_gt3_device && device != &intel_brw_gt1_device && device != &intel_brw_gt2_device && device != &intel_brw_gt3_device && device != &intel_chv_device && device != &intel_skl_gt1_device && device != &intel_skl_gt2_device && device != &intel_skl_gt3_device && device != &intel_skl_gt4_device)) return CL_INVALID_DEVICE; if (ver == NULL) return CL_SUCCESS; if (device == &intel_ivb_gt1_device || device == &intel_ivb_gt2_device || device == &intel_baytrail_t_device) { *ver = 7; } else if (device == &intel_hsw_gt1_device || device == &intel_hsw_gt2_device || device == &intel_hsw_gt3_device) { *ver = 75; } else if (device == &intel_brw_gt1_device || device == &intel_brw_gt2_device || device == &intel_brw_gt3_device || device == &intel_chv_device) { *ver = 8; } else if (device == &intel_skl_gt1_device || device == &intel_skl_gt2_device || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device) { *ver = 9; } else return CL_INVALID_VALUE; return CL_SUCCESS; } #undef DECL_FIELD #define _DECL_FIELD(FIELD) \ if (param_value && param_value_size < sizeof(FIELD)) \ return CL_INVALID_VALUE; \ if (param_value_size_ret != NULL) \ *param_value_size_ret = sizeof(FIELD); \ if (param_value) \ memcpy(param_value, &FIELD, sizeof(FIELD)); \ return CL_SUCCESS; #define DECL_FIELD(CASE,FIELD) \ case JOIN(CL_KERNEL_,CASE): \ _DECL_FIELD(FIELD) #include "cl_kernel.h" #include "cl_program.h" static int cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device) { const char * n = cl_kernel_get_name(kernel); const char * builtin_kernels_2d = "__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;__cl_fill_image_2d_array;"; const char * builtin_kernels_3d = "__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;__cl_copy_buffer_to_image_3d;__cl_fill_image_3d"; if (!strstr(device->built_in_kernels, n)){ return 0; }else if(strstr(builtin_kernels_2d, n)){ return 2; }else if(strstr(builtin_kernels_3d, n)){ return 3; }else return 1; } LOCAL size_t cl_get_kernel_max_wg_sz(cl_kernel kernel) { size_t work_group_size, thread_cnt; int simd_width = interp_kernel_get_simd_width(kernel->opaque); int device_id = kernel->program->ctx->device->device_id; if (!interp_kernel_use_slm(kernel->opaque)) { if (!IS_BAYTRAIL_T(device_id) || simd_width == 16) work_group_size = simd_width * 64; else work_group_size = kernel->program->ctx->device->max_compute_unit * kernel->program->ctx->device->max_thread_per_unit * simd_width; } else { thread_cnt = kernel->program->ctx->device->max_compute_unit * kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count; if(thread_cnt > 64) thread_cnt = 64; work_group_size = thread_cnt * simd_width; } if(work_group_size > kernel->program->ctx->device->max_work_group_size) work_group_size = kernel->program->ctx->device->max_work_group_size; return work_group_size; } LOCAL cl_int cl_get_kernel_workgroup_info(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret) { int err = CL_SUCCESS; int dimension = 0; if (UNLIKELY(device != &intel_ivb_gt1_device && device != &intel_ivb_gt2_device && device != &intel_baytrail_t_device && device != &intel_hsw_gt1_device && device != &intel_hsw_gt2_device && device != &intel_hsw_gt3_device && device != &intel_brw_gt1_device && device != &intel_brw_gt2_device && device != &intel_brw_gt3_device && device != &intel_chv_device && device != &intel_skl_gt1_device && device != &intel_skl_gt2_device && device != &intel_skl_gt3_device && device != &intel_skl_gt4_device)) return CL_INVALID_DEVICE; CHECK_KERNEL(kernel); switch (param_name) { case CL_KERNEL_WORK_GROUP_SIZE: { if (param_value && param_value_size < sizeof(size_t)) return CL_INVALID_VALUE; if (param_value_size_ret != NULL) *param_value_size_ret = sizeof(size_t); if (param_value) { size_t work_group_size = cl_get_kernel_max_wg_sz(kernel); *(size_t*)param_value = work_group_size; return CL_SUCCESS; } } DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, device->preferred_wg_sz_mul) case CL_KERNEL_LOCAL_MEM_SIZE: { size_t local_mem_sz = interp_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz; _DECL_FIELD(local_mem_sz) } DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz) DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size) case CL_KERNEL_GLOBAL_WORK_SIZE: dimension = cl_check_builtin_kernel_dimension(kernel, device); if ( !dimension ) return CL_INVALID_VALUE; if (param_value_size_ret != NULL) *param_value_size_ret = sizeof(device->max_1d_global_work_sizes); if (param_value) { if (dimension == 1) { memcpy(param_value, device->max_1d_global_work_sizes, sizeof(device->max_1d_global_work_sizes)); }else if(dimension == 2){ memcpy(param_value, device->max_2d_global_work_sizes, sizeof(device->max_2d_global_work_sizes)); }else if(dimension == 3){ memcpy(param_value, device->max_3d_global_work_sizes, sizeof(device->max_3d_global_work_sizes)); }else return CL_INVALID_VALUE; return CL_SUCCESS; } return CL_SUCCESS; default: return CL_INVALID_VALUE; }; error: return err; } Beignet-1.1.1-Source/src/cl_sampler.c000664 001750 001750 00000010207 12576733264 016550 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_context.h" #include "cl_sampler.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include uint32_t cl_to_clk(cl_bool normalized_coords, cl_addressing_mode address, cl_filter_mode filter) { int clk_address = CLK_ADDRESS_NONE; int clk_filter = CLK_FILTER_NEAREST; switch (address) { case CL_ADDRESS_NONE: clk_address = CLK_ADDRESS_NONE; break; case CL_ADDRESS_CLAMP: clk_address = CLK_ADDRESS_CLAMP; break; case CL_ADDRESS_CLAMP_TO_EDGE: clk_address = CLK_ADDRESS_CLAMP_TO_EDGE; break; case CL_ADDRESS_REPEAT: clk_address = CLK_ADDRESS_REPEAT; break; case CL_ADDRESS_MIRRORED_REPEAT: clk_address = CLK_ADDRESS_MIRRORED_REPEAT; break; default: assert(0); } switch(filter) { case CL_FILTER_NEAREST: clk_filter = CLK_FILTER_NEAREST; break; case CL_FILTER_LINEAR: clk_filter = CLK_FILTER_LINEAR; break; default: assert(0); } return (clk_address << __CLK_ADDRESS_BASE) | (normalized_coords << __CLK_NORMALIZED_BASE) | (clk_filter); } #define IS_SAMPLER_ARG(v) (v & __CLK_SAMPLER_ARG_KEY_BIT) #define SAMPLER_ARG_ID(v) ((v & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE) int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler) { int slot_id; for(slot_id = 0; slot_id < k->sampler_sz; slot_id++) { if (IS_SAMPLER_ARG(k->samplers[slot_id])) { if (SAMPLER_ARG_ID(k->samplers[slot_id]) == index) { k->samplers[slot_id] = (k->samplers[slot_id] & (~__CLK_SAMPLER_MASK)) | sampler->clkSamplerValue; return slot_id; } } } return -1; } LOCAL cl_sampler cl_sampler_new(cl_context ctx, cl_bool normalized_coords, cl_addressing_mode address, cl_filter_mode filter, cl_int *errcode_ret) { cl_sampler sampler = NULL; cl_int err = CL_SUCCESS; /* Allocate and inialize the structure itself */ TRY_ALLOC (sampler, CALLOC(struct _cl_sampler)); SET_ICD(sampler->dispatch) sampler->ref_n = 1; sampler->magic = CL_MAGIC_SAMPLER_HEADER; sampler->normalized_coords = normalized_coords; sampler->address = address; sampler->filter = filter; /* Append the sampler in the context sampler list */ pthread_mutex_lock(&ctx->sampler_lock); sampler->next = ctx->samplers; if (ctx->samplers != NULL) ctx->samplers->prev = sampler; ctx->samplers = sampler; pthread_mutex_unlock(&ctx->sampler_lock); sampler->ctx = ctx; cl_context_add_ref(ctx); sampler->clkSamplerValue = cl_to_clk(normalized_coords, address, filter); exit: if (errcode_ret) *errcode_ret = err; return sampler; error: cl_sampler_delete(sampler); sampler = NULL; goto exit; } LOCAL void cl_sampler_delete(cl_sampler sampler) { if (UNLIKELY(sampler == NULL)) return; if (atomic_dec(&sampler->ref_n) > 1) return; assert(sampler->ctx); pthread_mutex_lock(&sampler->ctx->sampler_lock); if (sampler->prev) sampler->prev->next = sampler->next; if (sampler->next) sampler->next->prev = sampler->prev; if (sampler->ctx->samplers == sampler) sampler->ctx->samplers = sampler->next; pthread_mutex_unlock(&sampler->ctx->sampler_lock); cl_context_delete(sampler->ctx); cl_free(sampler); } LOCAL void cl_sampler_add_ref(cl_sampler sampler) { assert(sampler); atomic_inc(&sampler->ref_n); } Beignet-1.1.1-Source/src/cl_alloc.h000664 001750 001750 00000002655 12576733264 016214 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_ALLOC_H__ #define __CL_ALLOC_H__ #include "cl_internals.h" #include /* Return a valid pointer for the requested memory block size */ extern void *cl_malloc(size_t sz); /* Aligned malloc */ extern void* cl_aligned_malloc(size_t sz, size_t align); /* malloc + memzero */ extern void *cl_calloc(size_t n, size_t elem_size); /* Regular realloc */ extern void *cl_realloc(void *ptr, size_t sz); /* Free a pointer allocated with cl_*alloc */ extern void cl_free(void *ptr); /* We count the number of allocation. This function report the number of * allocation still unfreed */ extern size_t cl_report_unfreed(void); #endif /* __CL_ALLOC_H__ */ Beignet-1.1.1-Source/src/cl_kernel.c000664 001750 001750 00000032766 12605356050 016366 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_kernel.h" #include "cl_program.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_mem.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_khr_icd.h" #include "CL/cl.h" #include "cl_sampler.h" #include #include #include #include #include LOCAL void cl_kernel_delete(cl_kernel k) { uint32_t i; if (k == NULL) return; /* We are not done with the kernel */ if (atomic_dec(&k->ref_n) > 1) return; /* Release one reference on all bos we own */ if (k->bo) cl_buffer_unreference(k->bo); /* This will be true for kernels created by clCreateKernel */ if (k->ref_its_program) cl_program_delete(k->program); /* Release the curbe if allocated */ if (k->curbe) cl_free(k->curbe); /* Release the argument array if required */ if (k->args) { for (i = 0; i < k->arg_n; ++i) if (k->args[i].mem != NULL) cl_mem_delete(k->args[i].mem); cl_free(k->args); } if (k->image_sz) cl_free(k->images); k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(k); } LOCAL cl_kernel cl_kernel_new(cl_program p) { cl_kernel k = NULL; TRY_ALLOC_NO_ERR (k, CALLOC(struct _cl_kernel)); SET_ICD(k->dispatch) k->ref_n = 1; k->magic = CL_MAGIC_KERNEL_HEADER; k->program = p; exit: return k; error: cl_kernel_delete(k); k = NULL; goto exit; } LOCAL const char* cl_kernel_get_name(cl_kernel k) { if (UNLIKELY(k == NULL)) return NULL; return interp_kernel_get_name(k->opaque); } LOCAL const char* cl_kernel_get_attributes(cl_kernel k) { if (UNLIKELY(k == NULL)) return NULL; return interp_kernel_get_attributes(k->opaque); } LOCAL void cl_kernel_add_ref(cl_kernel k) { atomic_inc(&k->ref_n); } LOCAL cl_int cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) { uint32_t offset; /* where to patch */ enum gbe_arg_type arg_type; /* kind of argument */ size_t arg_sz; /* size of the argument */ cl_mem mem = NULL; /* for __global, __constant and image arguments */ cl_context ctx = k->program->ctx; if (UNLIKELY(index >= k->arg_n)) return CL_INVALID_ARG_INDEX; arg_type = interp_kernel_get_arg_type(k->opaque, index); arg_sz = interp_kernel_get_arg_size(k->opaque, index); if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) { if (arg_type != GBE_ARG_SAMPLER || (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler))) return CL_INVALID_ARG_SIZE; } if(UNLIKELY(arg_type == GBE_ARG_LOCAL_PTR && sz == 0)) return CL_INVALID_ARG_SIZE; if(arg_type == GBE_ARG_VALUE) { if(UNLIKELY(value == NULL)) return CL_INVALID_ARG_VALUE; } else if(arg_type == GBE_ARG_LOCAL_PTR) { if(UNLIKELY(value != NULL)) return CL_INVALID_ARG_VALUE; } else if(arg_type == GBE_ARG_SAMPLER) { if (UNLIKELY(value == NULL)) return CL_INVALID_ARG_VALUE; cl_sampler s = *(cl_sampler*)value; if(s->magic != CL_MAGIC_SAMPLER_HEADER) return CL_INVALID_SAMPLER; } else { // should be image, GLOBAL_PTR, CONSTANT_PTR if (UNLIKELY(value == NULL && arg_type == GBE_ARG_IMAGE)) return CL_INVALID_ARG_VALUE; if(value != NULL) mem = *(cl_mem*)value; if(value != NULL && mem) { if( CL_SUCCESS != is_valid_mem(mem, ctx->buffers)) return CL_INVALID_MEM_OBJECT; if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !IS_IMAGE(mem)) || (arg_type != GBE_ARG_IMAGE && IS_IMAGE(mem)))) return CL_INVALID_ARG_VALUE; } } /* Copy the structure or the value directly into the curbe */ if (arg_type == GBE_ARG_VALUE) { offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); assert(offset + sz <= k->curbe_sz); memcpy(k->curbe + offset, value, sz); k->args[index].local_sz = 0; k->args[index].is_set = 1; k->args[index].mem = NULL; return CL_SUCCESS; } /* For a local pointer just save the size */ if (arg_type == GBE_ARG_LOCAL_PTR) { k->args[index].local_sz = sz; k->args[index].is_set = 1; k->args[index].mem = NULL; return CL_SUCCESS; } /* Is it a sampler*/ if (arg_type == GBE_ARG_SAMPLER) { cl_sampler sampler; memcpy(&sampler, value, sz); k->args[index].local_sz = 0; k->args[index].is_set = 1; k->args[index].mem = NULL; k->args[index].sampler = sampler; cl_set_sampler_arg_slot(k, index, sampler); offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); //assert(arg_sz == 4); assert(offset + 4 <= k->curbe_sz); memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4); return CL_SUCCESS; } if(value != NULL) mem = *(cl_mem*) value; if(value == NULL || mem == NULL) { /* for buffer object GLOBAL_PTR CONSTANT_PTR, it maybe NULL */ int32_t offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); *((uint32_t *)(k->curbe + offset)) = 0; assert(arg_type == GBE_ARG_GLOBAL_PTR || arg_type == GBE_ARG_CONSTANT_PTR); if (k->args[index].mem) cl_mem_delete(k->args[index].mem); k->args[index].mem = NULL; k->args[index].is_set = 1; k->args[index].local_sz = 0; return CL_SUCCESS; } mem = *(cl_mem*) value; cl_mem_add_ref(mem); if (k->args[index].mem) cl_mem_delete(k->args[index].mem); k->args[index].mem = mem; k->args[index].is_set = 1; k->args[index].local_sz = 0; k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index); return CL_SUCCESS; } LOCAL int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { assert(k != NULL); void *ret_info = interp_kernel_get_arg_info(k->opaque, arg_index, param_name - CL_KERNEL_ARG_ADDRESS_QUALIFIER); uint32_t arg_type = interp_kernel_get_arg_type(k->opaque, arg_index); int str_len = 0; cl_kernel_arg_type_qualifier type_qual = CL_KERNEL_ARG_TYPE_NONE; switch (param_name) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: if (param_value_size_ret) *param_value_size_ret = sizeof(cl_kernel_arg_address_qualifier); if (!param_value) return CL_SUCCESS; if (param_value_size < sizeof(cl_kernel_arg_address_qualifier)) return CL_INVALID_VALUE; if ((cl_ulong)ret_info == 0) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE; } else if ((cl_ulong)ret_info == 1 || (cl_ulong)ret_info == 4) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_GLOBAL; } else if ((cl_ulong)ret_info == 2) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_CONSTANT; } else if ((cl_ulong)ret_info == 3) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_LOCAL; } else { /* If no address qualifier is specified, the default address qualifier which is CL_KERNEL_ARG_ADDRESS_PRIVATE is returned. */ *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE; } return CL_SUCCESS; case CL_KERNEL_ARG_ACCESS_QUALIFIER: if (param_value_size_ret) *param_value_size_ret = sizeof(cl_kernel_arg_access_qualifier); if (!param_value) return CL_SUCCESS; if (param_value_size < sizeof(cl_kernel_arg_access_qualifier)) return CL_INVALID_VALUE; if (!strcmp((char*)ret_info, "write_only")) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ACCESS_WRITE_ONLY; } else if (!strcmp((char*)ret_info, "read_only")) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ACCESS_READ_ONLY; } else if (!strcmp((char*)ret_info, "read_write")) { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ACCESS_READ_WRITE; } else { *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ACCESS_NONE; } return CL_SUCCESS; case CL_KERNEL_ARG_TYPE_NAME: case CL_KERNEL_ARG_NAME: str_len = strlen(ret_info); if (param_value_size_ret) *param_value_size_ret = str_len + 1; if (!param_value) return CL_SUCCESS; if (param_value_size < str_len + 1) return CL_INVALID_VALUE; memcpy(param_value, ret_info, str_len); ((char *)param_value)[str_len] = 0; return CL_SUCCESS; case CL_KERNEL_ARG_TYPE_QUALIFIER: if (param_value_size_ret) *param_value_size_ret = sizeof(cl_kernel_arg_type_qualifier); if (!param_value) return CL_SUCCESS; if (param_value_size < sizeof(cl_kernel_arg_type_qualifier)) return CL_INVALID_VALUE; if (strstr((char*)ret_info, "const") && (arg_type == GBE_ARG_GLOBAL_PTR || arg_type == GBE_ARG_CONSTANT_PTR || arg_type == GBE_ARG_LOCAL_PTR)) type_qual = type_qual | CL_KERNEL_ARG_TYPE_CONST; if (strstr((char*)ret_info, "volatile")) type_qual = type_qual | CL_KERNEL_ARG_TYPE_VOLATILE; if (strstr((char*)ret_info, "restrict")) type_qual = type_qual | CL_KERNEL_ARG_TYPE_RESTRICT; *(cl_kernel_arg_type_qualifier *)param_value = type_qual; return CL_SUCCESS; default: assert(0); } return CL_SUCCESS; } LOCAL uint32_t cl_kernel_get_simd_width(cl_kernel k) { assert(k != NULL); return interp_kernel_get_simd_width(k->opaque); } LOCAL void cl_kernel_setup(cl_kernel k, gbe_kernel opaque) { cl_context ctx = k->program->ctx; cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx); if(k->bo != NULL) cl_buffer_unreference(k->bo); /* Allocate the gen code here */ const uint32_t code_sz = interp_kernel_get_code_size(opaque); const char *code = interp_kernel_get_code(opaque); k->bo = cl_buffer_alloc(bufmgr, "CL kernel", code_sz, 64u); k->arg_n = interp_kernel_get_arg_num(opaque); /* Upload the code */ cl_buffer_subdata(k->bo, 0, code_sz, code); k->opaque = opaque; /* Create the curbe */ k->curbe_sz = interp_kernel_get_curbe_size(k->opaque); /* Get sampler data & size */ k->sampler_sz = interp_kernel_get_sampler_size(k->opaque); assert(k->sampler_sz <= GEN_MAX_SAMPLERS); if (k->sampler_sz > 0) interp_kernel_get_sampler_data(k->opaque, k->samplers); interp_kernel_get_compile_wg_size(k->opaque, k->compile_wg_sz); k->stack_size = interp_kernel_get_stack_size(k->opaque); /* Get image data & size */ k->image_sz = interp_kernel_get_image_size(k->opaque); assert(k->sampler_sz <= GEN_MAX_SURFACES); assert(k->image_sz <= ctx->device->max_read_image_args + ctx->device->max_write_image_args); if (k->image_sz > 0) { TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0]))); interp_kernel_get_image_data(k->opaque, k->images); } else k->images = NULL; return; error: cl_buffer_unreference(k->bo); k->bo = NULL; } LOCAL cl_kernel cl_kernel_dup(cl_kernel from) { cl_kernel to = NULL; if (UNLIKELY(from == NULL)) return NULL; TRY_ALLOC_NO_ERR (to, CALLOC(struct _cl_kernel)); SET_ICD(to->dispatch) to->bo = from->bo; to->opaque = from->opaque; to->ref_n = 1; to->magic = CL_MAGIC_KERNEL_HEADER; to->program = from->program; to->arg_n = from->arg_n; to->curbe_sz = from->curbe_sz; to->sampler_sz = from->sampler_sz; to->image_sz = from->image_sz; memcpy(to->compile_wg_sz, from->compile_wg_sz, sizeof(from->compile_wg_sz)); to->stack_size = from->stack_size; if (to->sampler_sz) memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t)); if (to->image_sz) { TRY_ALLOC_NO_ERR(to->images, cl_calloc(to->image_sz, sizeof(to->images[0]))); memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0])); } else to->images = NULL; TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument))); if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz)); /* Retain the bos */ if (from->bo) cl_buffer_reference(from->bo); /* We retain the program destruction since this kernel (user allocated) * depends on the program for some of its pointers */ assert(from->program); cl_program_add_ref(from->program); to->ref_its_program = CL_TRUE; exit: return to; error: cl_kernel_delete(to); to = NULL; goto exit; } LOCAL cl_int cl_kernel_work_group_sz(cl_kernel ker, const size_t *local_wk_sz, uint32_t wk_dim, size_t *wk_grp_sz) { cl_int err = CL_SUCCESS; size_t sz = 0; cl_uint i; for (i = 0; i < wk_dim; ++i) { const uint32_t required_sz = interp_kernel_get_required_work_group_size(ker->opaque, i); if (required_sz != 0 && required_sz != local_wk_sz[i]) { err = CL_INVALID_WORK_ITEM_SIZE; goto error; } } sz = local_wk_sz[0]; for (i = 1; i < wk_dim; ++i) sz *= local_wk_sz[i]; if (sz > cl_get_kernel_max_wg_sz(ker)) { err = CL_INVALID_WORK_ITEM_SIZE; goto error; } error: if (wk_grp_sz) *wk_grp_sz = sz; return err; } Beignet-1.1.1-Source/src/cl_driver_defs.c000664 001750 001750 00000012450 12576733264 017403 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_driver.h" #include "cl_utils.h" #include /* Driver */ LOCAL cl_driver_new_cb *cl_driver_new = NULL; LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL; LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL; LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; /* Buffer */ LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL; LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL; LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL; LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL; LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL; LOCAL cl_buffer_reference_cb *cl_buffer_reference = NULL; LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL; LOCAL cl_buffer_map_cb *cl_buffer_map = NULL; LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL; LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL; LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL; LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL; LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL; LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL; LOCAL cl_buffer_pin_cb *cl_buffer_pin = NULL; LOCAL cl_buffer_unpin_cb *cl_buffer_unpin = NULL; LOCAL cl_buffer_subdata_cb *cl_buffer_subdata = NULL; LOCAL cl_buffer_get_subdata_cb *cl_buffer_get_subdata = NULL; LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL; LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL; LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL; LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL; LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL; /* cl_khr_gl_sharing */ LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL; LOCAL cl_gl_release_texture_cb *cl_gl_release_texture = NULL; LOCAL cl_gl_acquire_buffer_object_cb *cl_gl_acquire_buffer_object = NULL; LOCAL cl_gl_release_buffer_object_cb *cl_gl_release_buffer_object = NULL; LOCAL cl_gl_acquire_render_buffer_cb *cl_gl_acquire_render_buffer = NULL; LOCAL cl_gl_release_render_buffer_cb *cl_gl_release_render_buffer = NULL; /* GPGPU */ LOCAL cl_gpgpu_new_cb *cl_gpgpu_new = NULL; LOCAL cl_gpgpu_delete_cb *cl_gpgpu_delete = NULL; LOCAL cl_gpgpu_sync_cb *cl_gpgpu_sync = NULL; LOCAL cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf = NULL; LOCAL cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack = NULL; LOCAL cl_gpgpu_set_scratch_cb *cl_gpgpu_set_scratch = NULL; LOCAL cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image = NULL; LOCAL cl_gpgpu_get_cache_ctrl_cb *cl_gpgpu_get_cache_ctrl = NULL; LOCAL cl_gpgpu_state_init_cb *cl_gpgpu_state_init = NULL; LOCAL cl_gpgpu_alloc_constant_buffer_cb * cl_gpgpu_alloc_constant_buffer = NULL; LOCAL cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters = NULL; LOCAL cl_gpgpu_upload_curbes_cb *cl_gpgpu_upload_curbes = NULL; LOCAL cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup = NULL; LOCAL cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers = NULL; LOCAL cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset = NULL; LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL; LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL; LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL; LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL; LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL; LOCAL cl_gpgpu_event_new_cb *cl_gpgpu_event_new = NULL; LOCAL cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status = NULL; LOCAL cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush = NULL; LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL; LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp = NULL; LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL; LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL; LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL; LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL; LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL; LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL; LOCAL cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer = NULL; LOCAL cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info = NULL; LOCAL cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info = NULL; LOCAL cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer = NULL; Beignet-1.1.1-Source/src/cl_mem.c000664 001750 001750 00000221077 12605356050 015657 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_mem.h" #include "cl_image.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_device_id.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include "cl_command_queue.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #define FIELD_SIZE(CASE,TYPE) \ case JOIN(CL_,CASE): \ if(param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE); \ if(!param_value) \ return CL_SUCCESS; \ if(param_value_size < sizeof(TYPE)) \ return CL_INVALID_VALUE; \ break; #define MAX_TILING_SIZE 128 * MB static cl_mem_object_type cl_get_mem_object_type(cl_mem mem) { switch (mem->type) { case CL_MEM_BUFFER_TYPE: case CL_MEM_SUBBUFFER_TYPE: return CL_MEM_OBJECT_BUFFER; case CL_MEM_IMAGE_TYPE: case CL_MEM_GL_IMAGE_TYPE: { struct _cl_mem_image *image = cl_mem_image(mem); return image->image_type; } default: return CL_MEM_OBJECT_BUFFER; } } LOCAL cl_int cl_get_mem_object_info(cl_mem mem, cl_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { switch(param_name) { FIELD_SIZE(MEM_TYPE, cl_mem_object_type); FIELD_SIZE(MEM_FLAGS, cl_mem_flags); FIELD_SIZE(MEM_SIZE, size_t); FIELD_SIZE(MEM_HOST_PTR, void *); FIELD_SIZE(MEM_MAP_COUNT, cl_uint); FIELD_SIZE(MEM_REFERENCE_COUNT, cl_uint); FIELD_SIZE(MEM_CONTEXT, cl_context); FIELD_SIZE(MEM_ASSOCIATED_MEMOBJECT, cl_mem); FIELD_SIZE(MEM_OFFSET, size_t); default: return CL_INVALID_VALUE; } switch(param_name) { case CL_MEM_TYPE: *((cl_mem_object_type *)param_value) = cl_get_mem_object_type(mem); break; case CL_MEM_FLAGS: *((cl_mem_flags *)param_value) = mem->flags; break; case CL_MEM_SIZE: *((size_t *)param_value) = mem->size; break; case CL_MEM_HOST_PTR: if(mem->type == CL_MEM_IMAGE_TYPE) { *((size_t *)param_value) = (size_t)mem->host_ptr; } else { struct _cl_mem_buffer* buf = (struct _cl_mem_buffer*)mem; *((size_t *)param_value) = (size_t)mem->host_ptr + buf->sub_offset; } break; case CL_MEM_MAP_COUNT: *((cl_uint *)param_value) = mem->map_ref; break; case CL_MEM_REFERENCE_COUNT: *((cl_uint *)param_value) = mem->ref_n; break; case CL_MEM_CONTEXT: *((cl_context *)param_value) = mem->ctx; break; case CL_MEM_ASSOCIATED_MEMOBJECT: if(mem->type != CL_MEM_SUBBUFFER_TYPE) { *((cl_mem *)param_value) = NULL; } else { struct _cl_mem_buffer* buf = (struct _cl_mem_buffer*)mem; *((cl_mem *)param_value) = (cl_mem)(buf->parent); } break; case CL_MEM_OFFSET: if(mem->type != CL_MEM_SUBBUFFER_TYPE) { *((size_t *)param_value) = 0; } else { struct _cl_mem_buffer* buf = (struct _cl_mem_buffer*)mem; *((size_t *)param_value) = buf->sub_offset; } break; } return CL_SUCCESS; } #define IS_1D(image) (image->image_type == CL_MEM_OBJECT_IMAGE1D || \ image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || \ image->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) #define IS_2D(image) (image->image_type == CL_MEM_OBJECT_IMAGE2D || \ image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) #define IS_3D(image) (image->image_type == CL_MEM_OBJECT_IMAGE3D) #define IS_ARRAY(image) (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || \ image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) LOCAL cl_int cl_get_image_info(cl_mem mem, cl_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { int err; CHECK_IMAGE(mem, image); switch(param_name) { FIELD_SIZE(IMAGE_FORMAT, cl_image_format); FIELD_SIZE(IMAGE_ELEMENT_SIZE, size_t); FIELD_SIZE(IMAGE_ROW_PITCH, size_t); FIELD_SIZE(IMAGE_SLICE_PITCH, size_t); FIELD_SIZE(IMAGE_WIDTH, size_t); FIELD_SIZE(IMAGE_HEIGHT, size_t); FIELD_SIZE(IMAGE_DEPTH, size_t); FIELD_SIZE(IMAGE_ARRAY_SIZE, size_t); FIELD_SIZE(IMAGE_BUFFER, cl_mem); FIELD_SIZE(IMAGE_NUM_MIP_LEVELS, cl_uint); FIELD_SIZE(IMAGE_NUM_SAMPLES, cl_uint); default: return CL_INVALID_VALUE; } switch(param_name) { case CL_IMAGE_FORMAT: *(cl_image_format *)param_value = image->fmt; break; case CL_IMAGE_ELEMENT_SIZE: *(size_t *)param_value = image->bpp; break; case CL_IMAGE_ROW_PITCH: *(size_t *)param_value = image->row_pitch; break; case CL_IMAGE_SLICE_PITCH: *(size_t *)param_value = image->slice_pitch; break; case CL_IMAGE_WIDTH: if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) { struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image*) image; *(size_t *)param_value = buffer1d_image->size; } else *(size_t *)param_value = image->w; break; case CL_IMAGE_HEIGHT: if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) *(size_t *)param_value = 0; else *(size_t *)param_value = IS_1D(image) ? 0 : image->h; break; case CL_IMAGE_DEPTH: *(size_t *)param_value = IS_3D(image) ? image->depth : 0; break; case CL_IMAGE_ARRAY_SIZE: *(size_t *)param_value = IS_ARRAY(image) ? image->depth : 0; break; case CL_IMAGE_BUFFER: *(cl_mem *)param_value = image->buffer_1d; break; case CL_IMAGE_NUM_MIP_LEVELS: case CL_IMAGE_NUM_SAMPLES: *(cl_mem *)param_value = 0; break; } return CL_SUCCESS; error: return err; } #undef FIELD_SIZE LOCAL cl_mem cl_mem_allocate(enum cl_mem_type type, cl_context ctx, cl_mem_flags flags, size_t sz, cl_int is_tiled, void *host_ptr, cl_int *errcode) { cl_buffer_mgr bufmgr = NULL; cl_mem mem = NULL; cl_int err = CL_SUCCESS; size_t alignment = 64; assert(ctx); /* Allocate and inialize the structure itself */ if (type == CL_MEM_IMAGE_TYPE) { struct _cl_mem_image *image = NULL; TRY_ALLOC (image, CALLOC(struct _cl_mem_image)); mem = &image->base; } else if (type == CL_MEM_GL_IMAGE_TYPE ) { struct _cl_mem_gl_image *gl_image = NULL; TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image)); mem = &gl_image->base.base; } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) { struct _cl_mem_buffer1d_image *buffer1d_image = NULL; TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image)); mem = &buffer1d_image->base.base; } else { struct _cl_mem_buffer *buffer = NULL; TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer)); mem = &buffer->base; } mem->type = type; SET_ICD(mem->dispatch) mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; mem->is_userptr = 0; mem->offset = 0; if (sz != 0) { /* Pinning will require stricter alignment rules */ if ((flags & CL_MEM_PINNABLE) || is_tiled) alignment = 4096; /* Allocate space in memory */ bufmgr = cl_context_get_bufmgr(ctx); assert(bufmgr); #ifdef HAS_USERPTR if (ctx->device->host_unified_memory) { int page_size = getpagesize(); int cacheline_size = 0; cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cacheline_size), &cacheline_size, NULL); /* currently only cl buf is supported, will add cl image support later */ if (type == CL_MEM_BUFFER_TYPE) { if (flags & CL_MEM_USE_HOST_PTR) { assert(host_ptr != NULL); /* userptr not support tiling */ if (!is_tiled) { if (ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned long)host_ptr) { void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1))); mem->offset = host_ptr - aligned_host_ptr; mem->is_userptr = 1; size_t aligned_sz = ALIGN((mem->offset + sz), page_size); mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", aligned_host_ptr, aligned_sz, 0); } } } else if (flags & CL_MEM_ALLOC_HOST_PTR) { const size_t alignedSZ = ALIGN(sz, page_size); void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size); mem->host_ptr = internal_host_ptr; mem->is_userptr = 1; mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0); } } } if (!mem->is_userptr) mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); #else mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); #endif if (UNLIKELY(mem->bo == NULL)) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } mem->size = sz; } cl_context_add_ref(ctx); mem->ctx = ctx; /* Append the buffer in the context buffer list */ pthread_mutex_lock(&ctx->buffer_lock); mem->next = ctx->buffers; if (ctx->buffers != NULL) ctx->buffers->prev = mem; ctx->buffers = mem; pthread_mutex_unlock(&ctx->buffer_lock); exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_int is_valid_mem(cl_mem mem, cl_mem buffers) { cl_mem tmp = buffers; while(tmp){ if(mem == tmp){ if (UNLIKELY(mem->magic != CL_MAGIC_MEM_HEADER)) return CL_INVALID_MEM_OBJECT; return CL_SUCCESS; } tmp = tmp->next; } return CL_INVALID_MEM_OBJECT; } LOCAL cl_mem cl_mem_new_buffer(cl_context ctx, cl_mem_flags flags, size_t sz, void *data, cl_int *errcode_ret) { /* Possible mem type combination: CL_MEM_ALLOC_HOST_PTR CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR CL_MEM_USE_HOST_PTR CL_MEM_COPY_HOST_PTR */ cl_int err = CL_SUCCESS; cl_mem mem = NULL; cl_ulong max_mem_size; if (UNLIKELY(sz == 0)) { err = CL_INVALID_BUFFER_SIZE; goto error; } if (UNLIKELY(((flags & CL_MEM_READ_WRITE) && (flags & (CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY))) || ((flags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_WRITE_ONLY))) || ((flags & CL_MEM_ALLOC_HOST_PTR) && (flags & CL_MEM_USE_HOST_PTR)) || ((flags & CL_MEM_COPY_HOST_PTR) && (flags & CL_MEM_USE_HOST_PTR)) || ((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS)) || ((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) || ((flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS)) || ((flags & (~(CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) != 0))) { err = CL_INVALID_VALUE; goto error; } /* This flag is valid only if host_ptr is not NULL */ if (UNLIKELY((((flags & CL_MEM_COPY_HOST_PTR) || (flags & CL_MEM_USE_HOST_PTR)) && data == NULL)) || (!(flags & (CL_MEM_COPY_HOST_PTR |CL_MEM_USE_HOST_PTR)) && (data != NULL))) { err = CL_INVALID_HOST_PTR; goto error; } if ((err = cl_get_device_info(ctx->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_size), &max_mem_size, NULL)) != CL_SUCCESS) { goto error; } if (UNLIKELY(sz > max_mem_size)) { err = CL_INVALID_BUFFER_SIZE; goto error; } /* HSW: Byte scattered Read/Write has limitation that the buffer size must be a multiple of 4 bytes. */ sz = ALIGN(sz, 4); /* Create the buffer in video memory */ mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, data, &err); if (mem == NULL || err != CL_SUCCESS) goto error; /* Copy the data if required */ if (flags & CL_MEM_COPY_HOST_PTR) { if (mem->is_userptr) memcpy(mem->host_ptr, data, sz); else cl_buffer_subdata(mem->bo, 0, sz, data); } if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) cl_buffer_subdata(mem->bo, 0, sz, data); if (flags & CL_MEM_USE_HOST_PTR) mem->host_ptr = data; exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new_sub_buffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type create_type, const void *create_info, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; struct _cl_mem_buffer *sub_buf = NULL; if (buffer->type != CL_MEM_BUFFER_TYPE) { err = CL_INVALID_MEM_OBJECT; goto error; } if (flags && (((buffer->flags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY))) || ((buffer->flags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) || (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) || ((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS)) || ((flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) || ((flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_NO_ACCESS)))) { err = CL_INVALID_VALUE; goto error; } if((flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_READ_WRITE)) == 0) { flags |= buffer->flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_READ_WRITE); } flags |= buffer->flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR); if((flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0) { flags |= buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS); } if (create_type != CL_BUFFER_CREATE_TYPE_REGION) { err = CL_INVALID_VALUE; goto error; } if (!create_info) { err = CL_INVALID_VALUE; goto error; } cl_buffer_region *info = (cl_buffer_region *)create_info; if (!info->size) { err = CL_INVALID_BUFFER_SIZE; goto error; } if (info->origin > buffer->size || info->origin + info->size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (info->origin & (buffer->ctx->device->mem_base_addr_align / 8 - 1)) { err = CL_MISALIGNED_SUB_BUFFER_OFFSET; goto error; } /* Now create the sub buffer and link it to the buffer. */ TRY_ALLOC (sub_buf, CALLOC(struct _cl_mem_buffer)); mem = &sub_buf->base; mem->type = CL_MEM_SUBBUFFER_TYPE; SET_ICD(mem->dispatch) mem->ref_n = 1; mem->magic = CL_MAGIC_MEM_HEADER; mem->flags = flags; mem->offset = buffer->offset; mem->is_userptr = buffer->is_userptr; sub_buf->parent = (struct _cl_mem_buffer*)buffer; cl_mem_add_ref(buffer); /* Append the buffer in the parent buffer list */ pthread_mutex_lock(&((struct _cl_mem_buffer*)buffer)->sub_lock); sub_buf->sub_next = ((struct _cl_mem_buffer*)buffer)->subs; if (((struct _cl_mem_buffer*)buffer)->subs != NULL) ((struct _cl_mem_buffer*)buffer)->subs->sub_prev = sub_buf; ((struct _cl_mem_buffer*)buffer)->subs = sub_buf; pthread_mutex_unlock(&((struct _cl_mem_buffer*)buffer)->sub_lock); mem->bo = buffer->bo; mem->size = info->size; sub_buf->sub_offset = info->origin; if (buffer->flags & CL_MEM_USE_HOST_PTR || buffer->flags & CL_MEM_COPY_HOST_PTR) { mem->host_ptr = buffer->host_ptr; } cl_context_add_ref(buffer->ctx); mem->ctx = buffer->ctx; /* Append the buffer in the context buffer list */ pthread_mutex_lock(&buffer->ctx->buffer_lock); mem->next = buffer->ctx->buffers; if (buffer->ctx->buffers != NULL) buffer->ctx->buffers->prev = mem; buffer->ctx->buffers = mem; pthread_mutex_unlock(&buffer->ctx->buffer_lock); exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } void cl_mem_replace_buffer(cl_mem buffer, cl_buffer new_bo) { cl_buffer_unreference(buffer->bo); buffer->bo = new_bo; cl_buffer_reference(new_bo); if (buffer->type != CL_MEM_SUBBUFFER_TYPE) return; struct _cl_mem_buffer *it = ((struct _cl_mem_buffer*)buffer)->sub_next; for( ; it != (struct _cl_mem_buffer*)buffer; it = it->sub_next) { cl_buffer_unreference(it->base.bo); it->base.bo = new_bo; cl_buffer_reference(new_bo); } } void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, const void *src, size_t src_row_pitch, size_t src_slice_pitch, const struct _cl_mem_image *image, cl_bool offset_dst, cl_bool offset_src) { if(offset_dst) { size_t dst_offset = image->bpp * origin[0] + dst_row_pitch * origin[1] + dst_slice_pitch * origin[2]; dst = (char*)dst + dst_offset; } if(offset_src) { size_t src_offset = image->bpp * origin[0] + src_row_pitch * origin[1] + src_slice_pitch * origin[2]; src = (char*)src + src_offset; } if (!origin[0] && region[0] == image->w && dst_row_pitch == src_row_pitch && (region[2] == 1 || (!origin[1] && region[1] == image->h && dst_slice_pitch == src_slice_pitch))) { memcpy(dst, src, region[2] == 1 ? src_row_pitch*region[1] : src_slice_pitch*region[2]); } else { cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src_ptr = src; char* dst_ptr = dst; for (y = 0; y < region[1]; y++) { memcpy(dst_ptr, src_ptr, image->bpp*region[0]); src_ptr += src_row_pitch; dst_ptr += dst_row_pitch; } src = (char*)src + src_slice_pitch; dst = (char*)dst + dst_slice_pitch; } } } void cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region, const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image) { char* dst= cl_mem_map_auto((cl_mem)dst_image, 1); char* src= cl_mem_map_auto((cl_mem)src_image, 0); size_t dst_offset = dst_image->bpp * dst_origin[0] + dst_image->row_pitch * dst_origin[1] + dst_image->slice_pitch * dst_origin[2]; size_t src_offset = src_image->bpp * src_origin[0] + src_image->row_pitch * src_origin[1] + src_image->slice_pitch * src_origin[2]; dst= (char*)dst+ dst_offset; src= (char*)src+ src_offset; cl_uint y, z; for (z = 0; z < region[2]; z++) { const char* src_ptr = src; char* dst_ptr = dst; for (y = 0; y < region[1]; y++) { memcpy(dst_ptr, src_ptr, src_image->bpp*region[0]); src_ptr += src_image->row_pitch; dst_ptr += dst_image->row_pitch; } src = (char*)src + src_image->slice_pitch; dst = (char*)dst + dst_image->slice_pitch; } cl_mem_unmap_auto((cl_mem)src_image); cl_mem_unmap_auto((cl_mem)dst_image); } static void cl_mem_copy_image(struct _cl_mem_image *image, size_t row_pitch, size_t slice_pitch, void* host_ptr) { char* dst_ptr = cl_mem_map_auto((cl_mem)image, 1); size_t origin[3] = {0, 0, 0}; size_t region[3] = {image->w, image->h, image->depth}; cl_mem_copy_image_region(origin, region, dst_ptr, image->row_pitch, image->slice_pitch, host_ptr, row_pitch, slice_pitch, image, CL_FALSE, CL_FALSE); //offset is 0 cl_mem_unmap_auto((cl_mem)image); } cl_image_tiling_t cl_get_default_tiling(cl_driver drv) { static int initialized = 0; static cl_image_tiling_t tiling = CL_TILE_X; if (!initialized) { // FIXME, need to find out the performance diff's root cause on BDW. // SKL's 3D Image can't use TILE_X, so use TILE_Y as default if(cl_driver_get_ver(drv) == 8 || cl_driver_get_ver(drv) == 9) tiling = CL_TILE_Y; char *tilingStr = getenv("OCL_TILING"); if (tilingStr != NULL) { switch (tilingStr[0]) { case '0': tiling = CL_NO_TILE; break; case '1': tiling = CL_TILE_X; break; case '2': tiling = CL_TILE_Y; break; default: break; } } initialized = 1; } return tiling; } static cl_mem _cl_mem_new_image(cl_context ctx, cl_mem_flags flags, const cl_image_format *fmt, const cl_mem_object_type orig_image_type, size_t w, size_t h, size_t depth, size_t pitch, size_t slice_pitch, void *data, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; cl_mem_object_type image_type = orig_image_type; uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0; size_t origin_width = w; // for image1d buffer work around. cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ if (UNLIKELY((flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) && data == NULL)) { err = CL_INVALID_HOST_PTR; goto error; } /* Get the size of each pixel */ if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) goto error; /* Only a sub-set of the formats are supported */ intel_fmt = cl_image_get_intel_format(fmt); if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { err = CL_IMAGE_FORMAT_NOT_SUPPORTED; goto error; } /* See if the user parameters match */ #define DO_IMAGE_ERROR \ do { \ err = CL_INVALID_IMAGE_SIZE; \ goto error; \ } while (0); if (UNLIKELY(w == 0)) DO_IMAGE_ERROR; if (UNLIKELY(h == 0 && (image_type != CL_MEM_OBJECT_IMAGE1D && image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER))) DO_IMAGE_ERROR; if (image_type == CL_MEM_OBJECT_IMAGE1D) { size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; h = 1; depth = 1; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && (slice_pitch % pitch != 0))) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; tiling = CL_NO_TILE; } else if (image_type == CL_MEM_OBJECT_IMAGE2D || image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) { if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR; /* This is an image1d buffer which exceeds normal image size restrication We have to use a 2D image to simulate this 1D image. */ h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width; w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w; tiling = CL_NO_TILE; } else if (cl_driver_get_ver(ctx->drv) != 6) { /* Pick up tiling mode (we do only linear on SNB) */ tiling = cl_get_default_tiling(ctx->drv); } size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; depth = 1; } else if (image_type == CL_MEM_OBJECT_IMAGE3D || image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { h = 1; tiling = CL_NO_TILE; } else if (cl_driver_get_ver(ctx->drv) != 6) tiling = cl_get_default_tiling(ctx->drv); size_t min_pitch = bpp * w; if (data && pitch == 0) pitch = min_pitch; size_t min_slice_pitch = pitch * h; if (data && slice_pitch == 0) slice_pitch = min_slice_pitch; if (UNLIKELY(w > ctx->device->image3d_max_width)) DO_IMAGE_ERROR; if (UNLIKELY(h > ctx->device->image3d_max_height)) DO_IMAGE_ERROR; if (image_type == CL_MEM_OBJECT_IMAGE3D && (UNLIKELY(depth > ctx->device->image3d_max_depth))) DO_IMAGE_ERROR else if (UNLIKELY(depth > ctx->device->image_max_array_size)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR; if (UNLIKELY(data && min_slice_pitch > slice_pitch)) DO_IMAGE_ERROR; if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR; if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR; } else assert(0); #undef DO_IMAGE_ERROR /* Tiling requires to align both pitch and height */ if (tiling == CL_NO_TILE) { aligned_pitch = w * bpp; aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)); } else if (tiling == CL_TILE_X) { aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0)); aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1)); } else if (tiling == CL_TILE_Y) { aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 0)); aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 1)); } sz = aligned_pitch * aligned_h * depth; /* If sz is large than 128MB, map gtt may fail in some system. Because there is no obviours performance drop, disable tiling. */ if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) { tiling = CL_NO_TILE; aligned_pitch = w * bpp; aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)); sz = aligned_pitch * aligned_h * depth; } if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); else { mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err); if (mem != NULL && err == CL_SUCCESS) { struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image *)mem; buffer1d_image->size = origin_width;; } } if (mem == NULL || err != CL_SUCCESS) goto error; cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch); if (image_type == CL_MEM_OBJECT_IMAGE1D || image_type == CL_MEM_OBJECT_IMAGE2D || image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) aligned_slice_pitch = 0; else //SKL need use tiling's aligned_h to calc slice_pitch and IVB to BDW need CL_NO_TILE's aligned_h to calc. aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, tiling, 2)); cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt, intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling, 0, 0, 0); /* Copy the data if required */ if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); if (flags & CL_MEM_USE_HOST_PTR) { mem->host_ptr = data; cl_mem_image(mem)->host_row_pitch = pitch; cl_mem_image(mem)->host_slice_pitch = slice_pitch; } } exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } static cl_mem _cl_mem_new_image_from_buffer(cl_context ctx, cl_mem_flags flags, const cl_image_format* image_format, const cl_image_desc *image_desc, cl_int *errcode_ret) { cl_mem image = NULL; cl_mem buffer = image_desc->buffer; cl_int err = CL_SUCCESS; *errcode_ret = err; cl_ulong max_size; cl_mem_flags merged_flags; uint32_t bpp; uint32_t intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t offset = 0; /* Get the size of each pixel */ if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS)) goto error; /* Only a sub-set of the formats are supported */ intel_fmt = cl_image_get_intel_format(image_format); if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (!buffer) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_ALLOC_HOST_PTR|CL_MEM_COPY_HOST_PTR)) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } /* access check. */ if ((buffer->flags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY))) { err = CL_INVALID_VALUE; goto error; } if ((buffer->flags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) { err = CL_INVALID_VALUE; goto error; } if ((buffer->flags & CL_MEM_HOST_WRITE_ONLY) && (flags & CL_MEM_HOST_READ_ONLY)) { err = CL_INVALID_VALUE; goto error; } if ((buffer->flags & CL_MEM_HOST_READ_ONLY) && (flags & CL_MEM_HOST_WRITE_ONLY)) { err = CL_INVALID_VALUE; goto error; } if ((buffer->flags & CL_MEM_HOST_NO_ACCESS) && (flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY))) { err = CL_INVALID_VALUE; goto error; } if ((err = cl_get_device_info(ctx->device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(max_size), &max_size, NULL)) != CL_SUCCESS) { goto error; } if (image_desc->image_width > max_size) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } if (image_desc->image_width*bpp > buffer->size) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } merged_flags = buffer->flags; if (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY)) { merged_flags &= ~(CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY); merged_flags |= flags & (CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY); } if (flags & (CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS)) { merged_flags &= ~(CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS); merged_flags |= flags & (CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS); } struct _cl_mem_buffer *mem_buffer = (struct _cl_mem_buffer*)buffer; if (buffer->type == CL_MEM_SUBBUFFER_TYPE) { offset = ((struct _cl_mem_buffer *)buffer)->sub_offset; mem_buffer = mem_buffer->parent; } /* Get the size of each pixel */ if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS)) goto error; // Per bspec, a image should has a at least 2 line vertical alignment, // thus we can't simply attach a buffer to a 1d image surface which has the same size. // We have to create a new image, and copy the buffer data to this new image. // And replace all the buffer object's reference to this image. image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type, mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret); if (image == NULL) return NULL; void *src = cl_mem_map(buffer, 0); void *dst = cl_mem_map(image, 1); // // FIXME, we could use copy buffer to image to do this on GPU latter. // currently the copy buffer to image function doesn't support 1D image. // // There is a potential risk that this buffer was mapped and the caller // still hold the pointer and want to access it again. This scenario is // not explicitly forbidden in the spec, although it should not be permitted. memcpy(dst, src, mem_buffer->base.size); cl_mem_unmap(buffer); cl_mem_unmap(image); if (err != 0) goto error; // Now replace buffer's bo to this new bo, need to take care of sub buffer // case. cl_mem_replace_buffer(buffer, image->bo); /* Now point to the right offset if buffer is a SUB_BUFFER. */ if (buffer->flags & CL_MEM_USE_HOST_PTR) image->host_ptr = buffer->host_ptr + offset; cl_mem_image(image)->offset = offset; cl_mem_image(image)->w = image_desc->image_width; cl_mem_add_ref(buffer); cl_mem_image(image)->buffer_1d = buffer; return image; error: if (image) cl_mem_delete(image); image = NULL; *errcode_ret = err; return image; } LOCAL cl_mem cl_mem_new_image(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) { switch (image_desc->image_type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE3D: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_depth, image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, errcode_ret); case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_array_size, image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, errcode_ret); case CL_MEM_OBJECT_IMAGE1D_BUFFER: return _cl_mem_new_image_from_buffer(context, flags, image_format, image_desc, errcode_ret); break; case CL_MEM_OBJECT_BUFFER: default: assert(0); } return NULL; } LOCAL void cl_mem_delete(cl_mem mem) { cl_int i; if (UNLIKELY(mem == NULL)) return; if (atomic_dec(&mem->ref_n) > 1) return; #ifdef HAS_EGL if (UNLIKELY(IS_GL_IMAGE(mem))) { cl_mem_gl_delete(cl_mem_gl_image(mem)); } #endif /* iff we are a image, delete the 1d buffer if has. */ if (IS_IMAGE(mem)) { if (cl_mem_image(mem)->buffer_1d) { assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER); cl_mem_delete(cl_mem_image(mem)->buffer_1d); cl_mem_image(mem)->buffer_1d = NULL; } } /* Remove it from the list */ if (mem->ctx) { pthread_mutex_lock(&mem->ctx->buffer_lock); if (mem->prev) mem->prev->next = mem->next; if (mem->next) mem->next->prev = mem->prev; if (mem->ctx->buffers == mem) mem->ctx->buffers = mem->next; pthread_mutex_unlock(&mem->ctx->buffer_lock); cl_context_delete(mem->ctx); } else { assert((mem->prev == 0) && (mem->next == 0)); } /* Someone still mapped, unmap */ if(mem->map_ref > 0) { assert(mem->mapped_ptr); for(i=0; imapped_ptr_sz; i++) { if(mem->mapped_ptr[i].ptr != NULL) { mem->map_ref--; cl_mem_unmap_auto(mem); } } assert(mem->map_ref == 0); } if (mem->mapped_ptr) free(mem->mapped_ptr); if (mem->dstr_cb) { cl_mem_dstr_cb *cb = mem->dstr_cb; while (mem->dstr_cb) { cb = mem->dstr_cb; cb->pfn_notify(mem, cb->user_data); mem->dstr_cb = cb->next; free(cb); } } /* Iff we are sub, do nothing for bo release. */ if (mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; /* Remove it from the parent's list */ assert(buffer->parent); pthread_mutex_lock(&buffer->parent->sub_lock); if (buffer->sub_prev) buffer->sub_prev->sub_next = buffer->sub_next; if (buffer->sub_next) buffer->sub_next->sub_prev = buffer->sub_prev; if (buffer->parent->subs == buffer) buffer->parent->subs = buffer->sub_next; pthread_mutex_unlock(&buffer->parent->sub_lock); cl_mem_delete((cl_mem )(buffer->parent)); } else if (LIKELY(mem->bo != NULL)) { cl_buffer_unreference(mem->bo); } if (mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR) && (mem->type != CL_MEM_SUBBUFFER_TYPE)) cl_free(mem->host_ptr); cl_free(mem); } LOCAL void cl_mem_add_ref(cl_mem mem) { assert(mem); atomic_inc(&mem->ref_n); } #define LOCAL_SZ_0 16 #define LOCAL_SZ_1 4 #define LOCAL_SZ_2 4 LOCAL cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, size_t src_offset, size_t dst_offset, size_t cb) { cl_int ret = CL_SUCCESS; cl_kernel ker = NULL; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {1,1,1}; const unsigned int masks[4] = {0xffffffff, 0x0ff, 0x0ffff, 0x0ffffff}; int aligned = 0; int dw_src_offset = src_offset/4; int dw_dst_offset = dst_offset/4; if (!cb) return ret; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); /* All 16 bytes aligned, fast and easy one. */ if((cb % 16 == 0) && (src_offset % 16 == 0) && (dst_offset % 16 == 0)) { extern char cl_internal_copy_buf_align16_str[]; extern size_t cl_internal_copy_buf_align16_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16, cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL); cb = cb/16; aligned = 1; } else if ((cb % 4 == 0) && (src_offset % 4 == 0) && (dst_offset % 4 == 0)) { /* all Dword aligned.*/ extern char cl_internal_copy_buf_align4_str[]; extern size_t cl_internal_copy_buf_align4_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4, cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL); cb = cb/4; aligned = 1; } if (aligned) { if (!ker) return CL_OUT_OF_RESOURCES; if (cb < LOCAL_SZ_0) { local_sz[0] = 1; } else { local_sz[0] = LOCAL_SZ_0; } global_sz[0] = ((cb + LOCAL_SZ_0 - 1)/LOCAL_SZ_0)*LOCAL_SZ_0; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(int), &dw_src_offset); cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 3, sizeof(int), &dw_dst_offset); cl_kernel_set_arg(ker, 4, sizeof(int), &cb); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } /* Now handle the unaligned cases. */ int dw_num = ((dst_offset % 4 + cb) + 3) / 4; unsigned int first_mask = dst_offset % 4 == 0 ? 0x0 : masks[dst_offset % 4]; unsigned int last_mask = masks[(dst_offset + cb) % 4]; /* handle the very small range copy. */ if (cb < 4 && dw_num == 1) { first_mask = first_mask | ~last_mask; } if (cb < LOCAL_SZ_0) { local_sz[0] = 1; } else { local_sz[0] = LOCAL_SZ_0; } global_sz[0] = ((dw_num + LOCAL_SZ_0 - 1)/LOCAL_SZ_0)*LOCAL_SZ_0; if (src_offset % 4 == dst_offset % 4) { /* Src and dst has the same unaligned offset, just handle the header and tail. */ extern char cl_internal_copy_buf_unalign_same_offset_str[]; extern size_t cl_internal_copy_buf_unalign_same_offset_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, cl_internal_copy_buf_unalign_same_offset_str, (size_t)cl_internal_copy_buf_unalign_same_offset_str_size, NULL); if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(int), &dw_src_offset); cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 3, sizeof(int), &dw_dst_offset); cl_kernel_set_arg(ker, 4, sizeof(int), &dw_num); cl_kernel_set_arg(ker, 5, sizeof(int), &first_mask); cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } /* Dst's offset < Src's offset, so one dst dword need two sequential src dwords to fill it. */ if (dst_offset % 4 < src_offset % 4) { extern char cl_internal_copy_buf_unalign_dst_offset_str[]; extern size_t cl_internal_copy_buf_unalign_dst_offset_str_size; int align_diff = src_offset % 4 - dst_offset % 4; unsigned int dw_mask = masks[align_diff]; int shift = align_diff * 8; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, cl_internal_copy_buf_unalign_dst_offset_str, (size_t)cl_internal_copy_buf_unalign_dst_offset_str_size, NULL); if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(int), &dw_src_offset); cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 3, sizeof(int), &dw_dst_offset); cl_kernel_set_arg(ker, 4, sizeof(int), &dw_num); cl_kernel_set_arg(ker, 5, sizeof(int), &first_mask); cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask); cl_kernel_set_arg(ker, 7, sizeof(int), &shift); cl_kernel_set_arg(ker, 8, sizeof(int), &dw_mask); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } /* Dst's offset > Src's offset, so one dst dword need two sequential src - and src to fill it. */ if (dst_offset % 4 > src_offset % 4) { extern char cl_internal_copy_buf_unalign_src_offset_str[]; extern size_t cl_internal_copy_buf_unalign_src_offset_str_size; int align_diff = dst_offset % 4 - src_offset % 4; unsigned int dw_mask = masks[4 - align_diff]; int shift = align_diff * 8; int src_less = !(src_offset % 4) && !((src_offset + cb) % 4); ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, cl_internal_copy_buf_unalign_src_offset_str, (size_t)cl_internal_copy_buf_unalign_src_offset_str_size, NULL); cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(int), &dw_src_offset); cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 3, sizeof(int), &dw_dst_offset); cl_kernel_set_arg(ker, 4, sizeof(int), &dw_num); cl_kernel_set_arg(ker, 5, sizeof(int), &first_mask); cl_kernel_set_arg(ker, 6, sizeof(int), &last_mask); cl_kernel_set_arg(ker, 7, sizeof(int), &shift); cl_kernel_set_arg(ker, 8, sizeof(int), &dw_mask); cl_kernel_set_arg(ker, 9, sizeof(int), &src_less); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } /* no case can hanldle? */ assert(0); return ret; } LOCAL cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image* src_image, const size_t * origin, const size_t * region) { cl_int ret = CL_SUCCESS; cl_kernel ker = NULL; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) { extern char cl_internal_fill_image_1d_str[]; extern size_t cl_internal_fill_image_1d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D, cl_internal_fill_image_1d_str, (size_t)cl_internal_fill_image_1d_str_size, NULL); }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { extern char cl_internal_fill_image_1d_array_str[]; extern size_t cl_internal_fill_image_1d_array_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, cl_internal_fill_image_1d_array_str, (size_t)cl_internal_fill_image_1d_array_str_size, NULL); }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_fill_image_2d_str[]; extern size_t cl_internal_fill_image_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D, cl_internal_fill_image_2d_str, (size_t)cl_internal_fill_image_2d_str_size, NULL); }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { extern char cl_internal_fill_image_2d_array_str[]; extern size_t cl_internal_fill_image_2d_array_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, cl_internal_fill_image_2d_array_str, (size_t)cl_internal_fill_image_2d_array_str_size, NULL); }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_fill_image_3d_str[]; extern size_t cl_internal_fill_image_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_3D, cl_internal_fill_image_3d_str, (size_t)cl_internal_fill_image_3d_str_size, NULL); }else{ return CL_IMAGE_FORMAT_NOT_SUPPORTED; } if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image); cl_kernel_set_arg(ker, 1, sizeof(float)*4, pattern); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin[0]); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]); ret = cl_command_queue_ND_range(queue, ker, 3, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } LOCAL cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, cl_mem buffer, size_t offset, size_t size) { cl_int ret = CL_SUCCESS; cl_kernel ker = NULL; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {1,1,1}; char pattern_comb[4]; int is_128 = 0; const void * pattern1 = NULL; assert(offset % pattern_size == 0); assert(size % pattern_size == 0); if (!size) return ret; if (pattern_size == 128) { /* 128 is according to pattern of double16, but double works not very well on some platform. We use two float16 to handle this. */ extern char cl_internal_fill_buf_align128_str[]; extern size_t cl_internal_fill_buf_align128_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128, cl_internal_fill_buf_align128_str, (size_t)cl_internal_fill_buf_align128_str_size, NULL); is_128 = 1; pattern_size = pattern_size / 2; pattern1 = pattern + pattern_size; size = size / 2; } else if (pattern_size % 8 == 0) { /* Handle the 8 16 32 64 cases here. */ extern char cl_internal_fill_buf_align8_str[]; extern size_t cl_internal_fill_buf_align8_str_size; int order = ffs(pattern_size / 8) - 1; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order, cl_internal_fill_buf_align8_str, (size_t)cl_internal_fill_buf_align8_str_size, NULL); } else if (pattern_size == 4) { extern char cl_internal_fill_buf_align4_str[]; extern size_t cl_internal_fill_buf_align4_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4, cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL); } else if (size >= 4 && size % 4 == 0 && offset % 4 == 0) { /* The unaligned case. But if copy size and offset are aligned to 4, we can fake the pattern with the pattern duplication fill in. */ assert(pattern_size == 1 || pattern_size == 2); extern char cl_internal_fill_buf_align4_str[]; extern size_t cl_internal_fill_buf_align4_str_size; if (pattern_size == 2) { memcpy(pattern_comb, pattern, sizeof(char)*2); memcpy(pattern_comb + 2, pattern, sizeof(char)*2); } else { pattern_comb[0] = pattern_comb[1] = pattern_comb[2] = pattern_comb[3] = *(char *)pattern; } ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4, cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL); pattern_size = 4; pattern = pattern_comb; } //TODO: Unaligned cases, we may need to optimize it as cl_mem_copy, using mask in kernel //functions. This depend on the usage but now we just use aligned 1 and 2. else if (pattern_size == 2) { extern char cl_internal_fill_buf_align2_str[]; extern size_t cl_internal_fill_buf_align2_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2, cl_internal_fill_buf_align2_str, (size_t)cl_internal_fill_buf_align2_str_size, NULL); } else if (pattern_size == 1) { extern char cl_internal_fill_buf_unalign_str[]; extern size_t cl_internal_fill_buf_unalign_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN, cl_internal_fill_buf_unalign_str, (size_t)cl_internal_fill_buf_unalign_str_size, NULL); } else assert(0); if (!ker) return CL_OUT_OF_RESOURCES; size = size / pattern_size; offset = offset / pattern_size; if (size < LOCAL_SZ_0) { local_sz[0] = 1; } else { local_sz[0] = LOCAL_SZ_0; } global_sz[0] = ((size + LOCAL_SZ_0 - 1) / LOCAL_SZ_0) * LOCAL_SZ_0; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &buffer); cl_kernel_set_arg(ker, 1, pattern_size, pattern); cl_kernel_set_arg(ker, 2, sizeof(cl_uint), &offset); cl_kernel_set_arg(ker, 3, sizeof(cl_uint), &size); if (is_128) cl_kernel_set_arg(ker, 4, pattern_size, pattern1); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } LOCAL cl_int cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, const size_t *src_origin, const size_t *dst_origin, const size_t *region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch) { cl_int ret; cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1}; // the src and dst mem rect is continuous, the copy is degraded to buf copy if((region[0] == dst_row_pitch) && (region[0] == src_row_pitch) && (region[1] * src_row_pitch == src_slice_pitch) && (region[1] * dst_row_pitch == dst_slice_pitch)){ cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; cl_int size = region[0]*region[1]*region[2]; ret = cl_mem_copy(queue, src_buf, dst_buf,src_offset, dst_offset, size); return ret; } if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); /* setup the kernel and run. */ size_t region0 = region[0]; if( (src_offset % 4== 0) && (dst_offset % 4== 0) && (src_row_pitch % 4== 0) && (dst_row_pitch % 4== 0) && (src_slice_pitch % 4== 0) && (dst_slice_pitch % 4== 0) && (region0 % 4 == 0) ){ extern char cl_internal_copy_buf_rect_align4_str[]; extern size_t cl_internal_copy_buf_rect_align4_str_size; region0 /= 4; src_offset /= 4; dst_offset /= 4; src_row_pitch /= 4; dst_row_pitch /= 4; src_slice_pitch /= 4; dst_slice_pitch /= 4; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4, cl_internal_copy_buf_rect_align4_str, (size_t)cl_internal_copy_buf_rect_align4_str_size, NULL); }else{ extern char cl_internal_copy_buf_rect_str[]; extern size_t cl_internal_copy_buf_rect_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT, cl_internal_copy_buf_rect_str, (size_t)cl_internal_copy_buf_rect_str_size, NULL); } if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &dst_buf); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &src_offset); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &dst_offset); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_row_pitch); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &src_slice_pitch); cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_row_pitch); cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_slice_pitch); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); return ret; } LOCAL cl_int cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image, struct _cl_mem_image* dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region) { cl_int ret; cl_kernel ker = NULL; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; uint32_t fixupDataType; uint32_t savedIntelFmt; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; switch (src_image->fmt.image_channel_data_type) { case CL_SNORM_INT8: case CL_UNORM_INT8: fixupDataType = CL_UNSIGNED_INT8; break; case CL_HALF_FLOAT: case CL_SNORM_INT16: case CL_UNORM_INT16: fixupDataType = CL_UNSIGNED_INT16; break; case CL_FLOAT: fixupDataType = CL_UNSIGNED_INT32; break; default: fixupDataType = 0; } if (fixupDataType) { cl_image_format fmt; if (src_image->fmt.image_channel_order != CL_BGRA) fmt.image_channel_order = src_image->fmt.image_channel_order; else fmt.image_channel_order = CL_RGBA; fmt.image_channel_data_type = fixupDataType; savedIntelFmt = src_image->intel_fmt; src_image->intel_fmt = cl_image_get_intel_format(&fmt); dst_image->intel_fmt = src_image->intel_fmt; } /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_image->base.ctx == dst_image->base.ctx); /* setup the kernel and run. */ if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D) { extern char cl_internal_copy_image_1d_to_1d_str[]; extern size_t cl_internal_copy_image_1d_to_1d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, cl_internal_copy_image_1d_to_1d_str, (size_t)cl_internal_copy_image_1d_to_1d_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_copy_image_2d_to_2d_str[]; extern size_t cl_internal_copy_image_2d_to_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_2d_to_3d_str[]; extern size_t cl_internal_copy_image_2d_to_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { extern char cl_internal_copy_image_2d_to_2d_array_str[]; extern size_t cl_internal_copy_image_2d_to_2d_array_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, cl_internal_copy_image_2d_to_2d_array_str, (size_t)cl_internal_copy_image_2d_to_2d_array_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { extern char cl_internal_copy_image_1d_array_to_1d_array_str[]; extern size_t cl_internal_copy_image_1d_array_to_1d_array_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, cl_internal_copy_image_1d_array_to_1d_array_str, (size_t)cl_internal_copy_image_1d_array_to_1d_array_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { extern char cl_internal_copy_image_2d_array_to_2d_array_str[]; extern size_t cl_internal_copy_image_2d_array_to_2d_array_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, cl_internal_copy_image_2d_array_to_2d_array_str, (size_t)cl_internal_copy_image_2d_array_to_2d_array_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_copy_image_2d_array_to_2d_str[]; extern size_t cl_internal_copy_image_2d_array_to_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, cl_internal_copy_image_2d_array_to_2d_str, (size_t)cl_internal_copy_image_2d_array_to_2d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_2d_array_to_3d_str[]; extern size_t cl_internal_copy_image_2d_array_to_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, cl_internal_copy_image_2d_array_to_3d_str, (size_t)cl_internal_copy_image_2d_array_to_3d_str_size, NULL); } } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_copy_image_3d_to_2d_str[]; extern size_t cl_internal_copy_image_3d_to_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_3d_to_3d_str[]; extern size_t cl_internal_copy_image_3d_to_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL); } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { extern char cl_internal_copy_image_3d_to_2d_array_str[]; extern size_t cl_internal_copy_image_3d_to_2d_array_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, cl_internal_copy_image_3d_to_2d_array_str, (size_t)cl_internal_copy_image_3d_to_2d_array_str_size, NULL); } } if (!ker) { ret = CL_OUT_OF_RESOURCES; goto fail; } cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &dst_image); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &src_origin[0]); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_origin[0]); cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_origin[1]); cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); fail: cl_kernel_delete(ker); if (fixupDataType) { src_image->intel_fmt = savedIntelFmt; dst_image->intel_fmt = savedIntelFmt; } return ret; } LOCAL cl_int cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, cl_mem buffer, const size_t *src_origin, const size_t dst_offset, const size_t *region) { cl_int ret; cl_kernel ker = NULL; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; uint32_t intel_fmt, bpp; cl_image_format fmt; size_t origin0, region0; size_t kn_dst_offset; int align16 = 0; size_t align_size = 1; size_t w_saved; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(image->base.ctx == buffer->ctx); intel_fmt = image->intel_fmt; bpp = image->bpp; w_saved = image->w; region0 = region[0] * bpp; kn_dst_offset = dst_offset; if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) && ((src_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (dst_offset % 16 == 0)){ fmt.image_channel_order = CL_RGBA; fmt.image_channel_data_type = CL_UNSIGNED_INT32; align16 = 1; align_size = 16; } else{ fmt.image_channel_order = CL_R; fmt.image_channel_data_type = CL_UNSIGNED_INT8; align_size = 1; } image->intel_fmt = cl_image_get_intel_format(&fmt); image->w = (image->w * image->bpp) / align_size; image->bpp = align_size; region0 = (region[0] * bpp) / align_size; origin0 = (src_origin[0] * bpp) / align_size; kn_dst_offset /= align_size; global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; /* setup the kernel and run. */ if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { if(align16){ extern char cl_internal_copy_image_2d_to_buffer_align16_str[]; extern size_t cl_internal_copy_image_2d_to_buffer_align16_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16, cl_internal_copy_image_2d_to_buffer_align16_str, (size_t)cl_internal_copy_image_2d_to_buffer_align16_str_size, NULL); } else{ extern char cl_internal_copy_image_2d_to_buffer_str[]; extern size_t cl_internal_copy_image_2d_to_buffer_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL); } }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_3d_to_buffer_str[]; extern size_t cl_internal_copy_image_3d_to_buffer_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL); } if (!ker) { ret = CL_OUT_OF_RESOURCES; goto fail; } cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &kn_dst_offset); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); fail: cl_kernel_delete(ker); image->intel_fmt = intel_fmt; image->bpp = bpp; image->w = w_saved; return ret; } LOCAL cl_int cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_mem_image* image, const size_t src_offset, const size_t *dst_origin, const size_t *region) { cl_int ret; cl_kernel ker = NULL; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; uint32_t intel_fmt, bpp; cl_image_format fmt; size_t origin0, region0; size_t kn_src_offset; int align16 = 0; size_t align_size = 1; size_t w_saved = 0; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; /* We use one kernel to copy the data. The kernel is lazily created. */ assert(image->base.ctx == buffer->ctx); intel_fmt = image->intel_fmt; bpp = image->bpp; w_saved = image->w; region0 = region[0] * bpp; kn_src_offset = src_offset; if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) && ((dst_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (src_offset % 16 == 0)){ fmt.image_channel_order = CL_RGBA; fmt.image_channel_data_type = CL_UNSIGNED_INT32; align16 = 1; align_size = 16; } else{ fmt.image_channel_order = CL_R; fmt.image_channel_data_type = CL_UNSIGNED_INT8; align_size = 1; } image->intel_fmt = cl_image_get_intel_format(&fmt); image->w = (image->w * image->bpp) / align_size; image->bpp = align_size; region0 = (region[0] * bpp) / align_size; origin0 = (dst_origin[0] * bpp) / align_size; kn_src_offset /= align_size; global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; /* setup the kernel and run. */ if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { if(align16){ extern char cl_internal_copy_buffer_to_image_2d_align16_str[]; extern size_t cl_internal_copy_buffer_to_image_2d_align16_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16, cl_internal_copy_buffer_to_image_2d_align16_str, (size_t)cl_internal_copy_buffer_to_image_2d_align16_str_size, NULL); } else{ extern char cl_internal_copy_buffer_to_image_2d_str[]; extern size_t cl_internal_copy_buffer_to_image_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL); } }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_buffer_to_image_3d_str[]; extern size_t cl_internal_copy_buffer_to_image_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL); } if (!ker) return CL_OUT_OF_RESOURCES; cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image); cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer); cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0); cl_kernel_set_arg(ker, 6, sizeof(cl_int), &dst_origin[1]); cl_kernel_set_arg(ker, 7, sizeof(cl_int), &dst_origin[2]); cl_kernel_set_arg(ker, 8, sizeof(cl_int), &kn_src_offset); ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); cl_kernel_delete(ker); image->intel_fmt = intel_fmt; image->bpp = bpp; image->w = w_saved; return ret; } LOCAL void* cl_mem_map(cl_mem mem, int write) { cl_buffer_map(mem->bo, write); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL cl_int cl_mem_unmap(cl_mem mem) { cl_buffer_unmap(mem->bo); return CL_SUCCESS; } LOCAL void* cl_mem_map_gtt(cl_mem mem) { cl_buffer_map_gtt(mem->bo); assert(cl_buffer_get_virtual(mem->bo)); mem->mapped_gtt = 1; return cl_buffer_get_virtual(mem->bo); } LOCAL void * cl_mem_map_gtt_unsync(cl_mem mem) { cl_buffer_map_gtt_unsync(mem->bo); assert(cl_buffer_get_virtual(mem->bo)); return cl_buffer_get_virtual(mem->bo); } LOCAL cl_int cl_mem_unmap_gtt(cl_mem mem) { cl_buffer_unmap_gtt(mem->bo); return CL_SUCCESS; } LOCAL void* cl_mem_map_auto(cl_mem mem, int write) { //if mem is not created from userptr, the offset should be always zero. if (!mem->is_userptr) assert(mem->offset == 0); if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE) return cl_mem_map_gtt(mem); else { if (mem->is_userptr) { cl_buffer_wait_rendering(mem->bo); return mem->host_ptr; }else return cl_mem_map(mem, write); } } LOCAL cl_int cl_mem_unmap_auto(cl_mem mem) { if (mem->mapped_gtt == 1) { cl_buffer_unmap_gtt(mem->bo); mem->mapped_gtt = 0; } else if (!mem->is_userptr) cl_buffer_unmap(mem->bo); return CL_SUCCESS; } LOCAL cl_int cl_mem_pin(cl_mem mem) { assert(mem); if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0)) return CL_INVALID_MEM_OBJECT; cl_buffer_pin(mem->bo, 4096); return CL_SUCCESS; } LOCAL cl_int cl_mem_unpin(cl_mem mem) { assert(mem); if (UNLIKELY((mem->flags & CL_MEM_PINNABLE) == 0)) return CL_INVALID_MEM_OBJECT; cl_buffer_unpin(mem->bo); return CL_SUCCESS; } LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx, unsigned int bo_name, cl_int* errcode) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err); if (mem == NULL || err != CL_SUCCESS) goto error; size_t sz = 0; mem->bo = cl_buffer_get_buffer_from_libva(ctx, bo_name, &sz); if (mem->bo == NULL) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } mem->size = sz; exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx, unsigned int bo_name, size_t offset, size_t width, size_t height, cl_image_format fmt, size_t row_pitch, cl_int *errcode) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; struct _cl_mem_image *image = NULL; uint32_t intel_fmt, bpp; /* Get the size of each pixel */ if (UNLIKELY((err = cl_image_byte_per_pixel(&fmt, &bpp)) != CL_SUCCESS)) goto error; intel_fmt = cl_image_get_intel_format(&fmt); if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) { err = CL_IMAGE_FORMAT_NOT_SUPPORTED; goto error; } mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err); if (mem == NULL || err != CL_SUCCESS) { err = CL_OUT_OF_HOST_MEMORY; goto error; } image = cl_mem_image(mem); mem->bo = cl_buffer_get_image_from_libva(ctx, bo_name, image); image->w = width; image->h = height; image->image_type = CL_MEM_OBJECT_IMAGE2D; image->depth = 2; image->fmt = fmt; image->intel_fmt = intel_fmt; image->bpp = bpp; image->row_pitch = row_pitch; image->slice_pitch = 0; // NOTE: tiling of image is set in cl_buffer_get_image_from_libva(). image->tile_x = 0; image->tile_y = 0; image->offset = offset; exit: if (errcode) *errcode = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL cl_int cl_mem_get_fd(cl_mem mem, int* fd) { cl_int err = CL_SUCCESS; if(cl_buffer_get_fd(mem->bo, fd)) err = CL_INVALID_OPERATION; return err; } Beignet-1.1.1-Source/src/cl_platform_id.h000664 001750 001750 00000005603 12576733264 017416 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_PLATFORM_ID_H__ #define __CL_PLATFORM_ID_H__ #include "CL/cl.h" #include "cl_internals.h" #include "cl_extensions.h" #include "cl_khr_icd.h" #include "src/OCLConfig.h" #include "src/git_sha1.h" struct _cl_platform_id { DEFINE_ICD(dispatch) const char *profile; const char *version; const char *name; const char *vendor; char *extensions; const char *icd_suffix_khr; size_t profile_sz; size_t version_sz; size_t name_sz; size_t vendor_sz; size_t extensions_sz; size_t icd_suffix_khr_sz; struct cl_extensions *internal_extensions; }; /* Return the default platform */ extern cl_platform_id cl_get_platform_default(void); /* Return the valid platform */ extern cl_int cl_get_platform_ids(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); /* Return information for the current platform */ extern cl_int cl_get_platform_info(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); #define _STR(x) #x #define _JOINT(x, y) _STR(x) "." _STR(y) #define _JOINT3(x, y, z) _STR(x) "." _STR(y) "." _STR(z) #ifdef BEIGNET_GIT_SHA1 #define BEIGNET_GIT_SHA1_STRING " (" BEIGNET_GIT_SHA1 ")" #else #define BEIGNET_GIT_SHA1_STRING #endif #ifdef LIBCL_DRIVER_VERSION_PATCH #define LIBCL_DRIVER_VERSION_STRING _JOINT3(LIBCL_DRIVER_VERSION_MAJOR, LIBCL_DRIVER_VERSION_MINOR, LIBCL_DRIVER_VERSION_PATCH) #else #define LIBCL_DRIVER_VERSION_STRING _JOINT(LIBCL_DRIVER_VERSION_MAJOR, LIBCL_DRIVER_VERSION_MINOR) #endif #define LIBCL_VERSION_STRING "OpenCL " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING #define LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING #endif /* __CL_PLATFORM_ID_H__ */ Beignet-1.1.1-Source/src/cl_gbe_loader.h000664 001750 001750 00000010033 12576744576 017202 0ustar00yryr000000 000000 /* * Copyright © 2014 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #ifndef __CL_GBE_LOADER_H__ #define __CL_GBE_LOADER_H__ #include "program.h" #ifdef __cplusplus extern "C" { #endif extern gbe_program_new_from_source_cb *compiler_program_new_from_source; extern gbe_program_compile_from_source_cb *compiler_program_compile_from_source; extern gbe_program_new_gen_program_cb *compiler_program_new_gen_program; extern gbe_program_link_program_cb *compiler_program_link_program; extern gbe_program_check_opt_cb *compiler_program_check_opt; extern gbe_program_build_from_llvm_cb *compiler_program_build_from_llvm; extern gbe_program_new_from_llvm_binary_cb *compiler_program_new_from_llvm_binary; extern gbe_program_serialize_to_binary_cb *compiler_program_serialize_to_binary; extern gbe_program_new_from_llvm_cb *compiler_program_new_from_llvm; extern gbe_program_clean_llvm_resource_cb *compiler_program_clean_llvm_resource; extern gbe_program_new_from_binary_cb *interp_program_new_from_binary; extern gbe_program_get_global_constant_size_cb *interp_program_get_global_constant_size; extern gbe_program_get_global_constant_data_cb *interp_program_get_global_constant_data; extern gbe_program_delete_cb *interp_program_delete; extern gbe_program_get_kernel_num_cb *interp_program_get_kernel_num; extern gbe_program_get_kernel_by_name_cb *interp_program_get_kernel_by_name; extern gbe_program_get_kernel_cb *interp_program_get_kernel; extern gbe_kernel_get_name_cb *interp_kernel_get_name; extern gbe_kernel_get_attributes_cb *interp_kernel_get_attributes; extern gbe_kernel_get_code_cb *interp_kernel_get_code; extern gbe_kernel_get_code_size_cb *interp_kernel_get_code_size; extern gbe_kernel_get_arg_num_cb *interp_kernel_get_arg_num; extern gbe_kernel_get_arg_size_cb *interp_kernel_get_arg_size; extern gbe_kernel_get_arg_bti_cb *interp_kernel_get_arg_bti; extern gbe_kernel_get_arg_type_cb *interp_kernel_get_arg_type; extern gbe_kernel_get_arg_align_cb *interp_kernel_get_arg_align; extern gbe_kernel_get_simd_width_cb *interp_kernel_get_simd_width; extern gbe_kernel_get_curbe_offset_cb *interp_kernel_get_curbe_offset; extern gbe_kernel_get_curbe_size_cb *interp_kernel_get_curbe_size; extern gbe_kernel_get_stack_size_cb *interp_kernel_get_stack_size; extern gbe_kernel_get_scratch_size_cb *interp_kernel_get_scratch_size; extern gbe_kernel_get_required_work_group_size_cb *interp_kernel_get_required_work_group_size; extern gbe_kernel_use_slm_cb *interp_kernel_use_slm; extern gbe_kernel_get_slm_size_cb *interp_kernel_get_slm_size; extern gbe_kernel_get_sampler_size_cb *interp_kernel_get_sampler_size; extern gbe_kernel_get_sampler_data_cb *interp_kernel_get_sampler_data; extern gbe_kernel_get_compile_wg_size_cb *interp_kernel_get_compile_wg_size; extern gbe_kernel_get_image_size_cb *interp_kernel_get_image_size; extern gbe_kernel_get_image_data_cb *interp_kernel_get_image_data; extern gbe_get_printf_num_cb* interp_get_printf_num; extern gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti; extern gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti; extern gbe_dup_printfset_cb* interp_dup_printfset; extern gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size; extern gbe_release_printf_info_cb* interp_release_printf_info; extern gbe_output_printf_cb* interp_output_printf; extern gbe_kernel_get_arg_info_cb *interp_kernel_get_arg_info; int CompilerSupported(); #ifdef __cplusplus } #endif #endif /* __CL_GBE_LOADER_H__ */ Beignet-1.1.1-Source/src/cl_context.c000664 001750 001750 00000022666 12576733264 016605 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_mem.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include "cl_kernel.h" #include "cl_program.h" #include "CL/cl.h" #include "CL/cl_gl.h" #include #include #include #include #include #define CHECK(var) \ if (var) \ return CL_INVALID_PROPERTY; \ else \ var = 1; static cl_int cl_context_properties_process(const cl_context_properties *prop, struct _cl_context_prop *cl_props, cl_uint * prop_len) { int set_cl_context_platform = 0, set_cl_gl_context_khr = 0, set_cl_egl_display_khr = 0, set_cl_glx_display_khr = 0, set_cl_wgl_hdc_khr = 0, set_cl_cgl_sharegroup_khr = 0; cl_int err = CL_SUCCESS; cl_props->gl_type = CL_GL_NOSHARE; cl_props->platform_id = 0; if (prop == NULL) goto exit; while(*prop) { switch (*prop) { case CL_CONTEXT_PLATFORM: CHECK (set_cl_context_platform); cl_props->platform_id = *(prop + 1); if (UNLIKELY((cl_platform_id) cl_props->platform_id != cl_get_platform_default())) { err = CL_INVALID_PLATFORM; goto error; } break; case CL_GL_CONTEXT_KHR: CHECK (set_cl_gl_context_khr); cl_props->gl_context = *(prop + 1); break; case CL_EGL_DISPLAY_KHR: CHECK (set_cl_egl_display_khr); cl_props->gl_type = CL_GL_EGL_DISPLAY; cl_props->egl_display = *(prop + 1); break; case CL_GLX_DISPLAY_KHR: CHECK (set_cl_glx_display_khr); cl_props->gl_type = CL_GL_GLX_DISPLAY; cl_props->glx_display = *(prop + 1); break; case CL_WGL_HDC_KHR: CHECK (set_cl_wgl_hdc_khr); cl_props->gl_type = CL_GL_WGL_HDC; cl_props->wgl_hdc = *(prop + 1); break; case CL_CGL_SHAREGROUP_KHR: CHECK (set_cl_cgl_sharegroup_khr); cl_props->gl_type = CL_GL_CGL_SHAREGROUP; cl_props->cgl_sharegroup = *(prop + 1); break; default: err = CL_INVALID_PROPERTY; goto error; } prop += 2; *prop_len += 2; } (*prop_len)++; exit: error: return err; } LOCAL cl_context cl_create_context(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (CL_CALLBACK * pfn_notify) (const char*, const void*, size_t, void*), void * user_data, cl_int * errcode_ret) { /* cl_platform_id platform = NULL; */ struct _cl_context_prop props; cl_context ctx = NULL; cl_int err = CL_SUCCESS; cl_uint prop_len = 0; /* XXX */ FATAL_IF (num_devices != 1, "Only one device is supported"); /* Check that we are getting the right platform */ if (UNLIKELY(((err = cl_context_properties_process(properties, &props, &prop_len)) != CL_SUCCESS))) goto error; /* We are good */ if (UNLIKELY((ctx = cl_context_new(&props)) == NULL)) { err = CL_OUT_OF_HOST_MEMORY; goto error; } if(properties != NULL && prop_len > 0) { TRY_ALLOC (ctx->prop_user, CALLOC_ARRAY(cl_context_properties, prop_len)); memcpy(ctx->prop_user, properties, sizeof(cl_context_properties)*prop_len); } ctx->prop_len = prop_len; /* Attach the device to the context */ ctx->device = *devices; /* Save the user callback and user data*/ ctx->pfn_notify = pfn_notify; ctx->user_data = user_data; cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result); exit: if (errcode_ret != NULL) *errcode_ret = err; return ctx; error: cl_context_delete(ctx); ctx = NULL; goto exit; } LOCAL cl_context cl_context_new(struct _cl_context_prop *props) { cl_context ctx = NULL; TRY_ALLOC_NO_ERR (ctx, CALLOC(struct _cl_context)); TRY_ALLOC_NO_ERR (ctx->drv, cl_driver_new(props)); SET_ICD(ctx->dispatch) ctx->props = *props; ctx->magic = CL_MAGIC_CONTEXT_HEADER; ctx->ref_n = 1; ctx->ver = cl_driver_get_ver(ctx->drv); pthread_mutex_init(&ctx->program_lock, NULL); pthread_mutex_init(&ctx->queue_lock, NULL); pthread_mutex_init(&ctx->buffer_lock, NULL); pthread_mutex_init(&ctx->sampler_lock, NULL); exit: return ctx; error: cl_context_delete(ctx); ctx = NULL; goto exit; } LOCAL void cl_context_delete(cl_context ctx) { int i = 0; if (UNLIKELY(ctx == NULL)) return; /* We are not done yet */ if (atomic_dec(&ctx->ref_n) > 1) return; /* delete the internal programs. */ for (i = CL_INTERNAL_KERNEL_MIN; i < CL_INTERNAL_KERNEL_MAX; i++) { if (ctx->internal_kernels[i]) { cl_kernel_delete(ctx->internal_kernels[i]); ctx->internal_kernels[i] = NULL; assert(ctx->internal_prgs[i]); cl_program_delete(ctx->internal_prgs[i]); ctx->internal_prgs[i] = NULL; } if (ctx->internal_kernels[i]) { cl_kernel_delete(ctx->built_in_kernels[i]); ctx->built_in_kernels[i] = NULL; } } cl_program_delete(ctx->built_in_prgs); ctx->built_in_prgs = NULL; /* All object lists should have been freed. Otherwise, the reference counter * of the context cannot be 0 */ assert(ctx->queues == NULL); assert(ctx->programs == NULL); assert(ctx->buffers == NULL); assert(ctx->drv); cl_free(ctx->prop_user); cl_driver_delete(ctx->drv); ctx->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(ctx); } LOCAL void cl_context_add_ref(cl_context ctx) { assert(ctx); atomic_inc(&ctx->ref_n); } LOCAL cl_command_queue cl_context_create_queue(cl_context ctx, cl_device_id device, cl_command_queue_properties properties, /* XXX */ cl_int *errcode_ret) { cl_command_queue queue = NULL; cl_int err = CL_SUCCESS; /* We create the command queue and store it in the context list of queues */ TRY_ALLOC (queue, cl_command_queue_new(ctx)); queue->props = properties; exit: if (errcode_ret) *errcode_ret = err; return queue; error: cl_command_queue_delete(queue); queue = NULL; goto exit; } cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx) { return cl_driver_get_bufmgr(ctx->drv); } cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index, const char * str_kernel, size_t size, const char * str_option) { cl_int ret; cl_int binary_status = CL_SUCCESS; cl_kernel ker; pthread_mutex_lock(&ctx->program_lock); if (ctx->internal_prgs[index] == NULL) { ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device, &size, (const unsigned char **)&str_kernel, &binary_status, &ret); if (!ctx->internal_prgs[index]) { ker = NULL; goto unlock; } ret = cl_program_build(ctx->internal_prgs[index], str_option); if (ret != CL_SUCCESS) { ker = NULL; goto unlock; } ctx->internal_prgs[index]->is_built = 1; /* All CL_ENQUEUE_FILL_BUFFER_ALIGN16_xxx use the same program, different kernel. */ if (index >= CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 && index <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) { int i = CL_ENQUEUE_FILL_BUFFER_ALIGN8_8; for (; i <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64; i++) { if (index != i) { assert(ctx->internal_prgs[i] == NULL); assert(ctx->internal_kernels[i] == NULL); cl_program_add_ref(ctx->internal_prgs[index]); ctx->internal_prgs[i] = ctx->internal_prgs[index]; } if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_8) { ctx->internal_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], "__cl_fill_region_align8_2", NULL); } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_16) { ctx->internal_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], "__cl_fill_region_align8_4", NULL); } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_32) { ctx->internal_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], "__cl_fill_region_align8_8", NULL); } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) { ctx->internal_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], "__cl_fill_region_align8_16", NULL); } else assert(0); } } else { ctx->internal_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); } } ker = ctx->internal_kernels[index]; unlock: pthread_mutex_unlock(&ctx->program_lock); return cl_kernel_dup(ker); } Beignet-1.1.1-Source/src/cl_internals.h000664 001750 001750 00000002636 12576733264 017120 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_INTERNALS_H__ #define __CL_INTERNALS_H__ /* We put a header to identify each object. This will make the programmer life * easy if objects are wrongly used in the API */ #define CL_MAGIC_KERNEL_HEADER 0x1234567890abcdefLL #define CL_MAGIC_CONTEXT_HEADER 0x0ab123456789cdefLL #define CL_MAGIC_PROGRAM_HEADER 0x34560ab12789cdefLL #define CL_MAGIC_QUEUE_HEADER 0x83650a12b79ce4dfLL #define CL_MAGIC_SAMPLER_HEADER 0x686a0ecba79ce33fLL #define CL_MAGIC_EVENT_HEADER 0x8324a9c810ebf90fLL #define CL_MAGIC_MEM_HEADER 0x381a27b9ce6504dfLL #define CL_MAGIC_DEAD_HEADER 0xdeaddeaddeaddeadLL #endif /* __CL_INTERNALS_H__ */ Beignet-1.1.1-Source/src/cl_mem_gl.h000664 001750 001750 00000001047 12576733264 016354 0ustar00yryr000000 000000 #ifndef __CL_MEM_GL_H__ #define __CL_MEM_GL_H__ #include "cl_mem.h" cl_mem cl_mem_new_gl_buffer(cl_context ctx, cl_mem_flags flags, GLuint buf_obj, cl_int *errcode_ret); cl_mem cl_mem_new_gl_texture(cl_context ctx, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int *errcode_ret); #endif Beignet-1.1.1-Source/src/cl_command_queue.c000664 001750 001750 00000031070 12605356050 017713 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "program.h" // for BTI_MAX_IMAGE_NUM #include "cl_command_queue.h" #include "cl_context.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_device_id.h" #include "cl_mem.h" #include "cl_utils.h" #include "cl_thread.h" #include "cl_alloc.h" #include "cl_driver.h" #include "cl_khr_icd.h" #include "cl_event.h" #include "performance.h" #include #include #include LOCAL cl_command_queue cl_command_queue_new(cl_context ctx) { cl_command_queue queue = NULL; assert(ctx); TRY_ALLOC_NO_ERR (queue, CALLOC(struct _cl_command_queue)); SET_ICD(queue->dispatch) queue->magic = CL_MAGIC_QUEUE_HEADER; queue->ref_n = 1; queue->ctx = ctx; if ((queue->thread_data = cl_thread_data_create()) == NULL) { goto error; } /* Append the command queue in the list */ pthread_mutex_lock(&ctx->queue_lock); queue->next = ctx->queues; if (ctx->queues != NULL) ctx->queues->prev = queue; ctx->queues = queue; pthread_mutex_unlock(&ctx->queue_lock); /* The queue also belongs to its context */ cl_context_add_ref(ctx); exit: return queue; error: cl_command_queue_delete(queue); queue = NULL; goto exit; } LOCAL void cl_command_queue_delete(cl_command_queue queue) { assert(queue); if (atomic_dec(&queue->ref_n) != 1) return; // If there is a valid last event, we need to give it a chance to // call the call-back function. cl_event last_event = get_last_event(queue); if (last_event && last_event->user_cb) cl_event_update_status(last_event, 1); /* Remove it from the list */ assert(queue->ctx); pthread_mutex_lock(&queue->ctx->queue_lock); if (queue->prev) queue->prev->next = queue->next; if (queue->next) queue->next->prev = queue->prev; if (queue->ctx->queues == queue) queue->ctx->queues = queue->next; pthread_mutex_unlock(&queue->ctx->queue_lock); cl_thread_data_destroy(queue); queue->thread_data = NULL; cl_mem_delete(queue->perf); cl_context_delete(queue->ctx); cl_free(queue->wait_events); queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(queue); } LOCAL void cl_command_queue_add_ref(cl_command_queue queue) { atomic_inc(&queue->ref_n); } static void set_image_info(char *curbe, struct ImageInfo * image_info, struct _cl_mem_image *image) { if (image_info->wSlot >= 0) *(uint32_t*)(curbe + image_info->wSlot) = image->w; if (image_info->hSlot >= 0) *(uint32_t*)(curbe + image_info->hSlot) = image->h; if (image_info->depthSlot >= 0) *(uint32_t*)(curbe + image_info->depthSlot) = image->depth; if (image_info->channelOrderSlot >= 0) *(uint32_t*)(curbe + image_info->channelOrderSlot) = image->fmt.image_channel_order; if (image_info->dataTypeSlot >= 0) *(uint32_t*)(curbe + image_info->dataTypeSlot) = image->fmt.image_channel_data_type; } LOCAL cl_int cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) { uint32_t i; GET_QUEUE_THREAD_GPGPU(queue); for (i = 0; i < k->image_sz; i++) { int id = k->images[i].arg_idx; struct _cl_mem_image *image; assert(interp_kernel_get_arg_type(k->opaque, id) == GBE_ARG_IMAGE); //currently, user ptr is not supported for cl image, so offset should be always zero assert(k->args[id].mem->offset == 0); image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer // on demand. if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); } return CL_SUCCESS; } LOCAL cl_int cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) { GET_QUEUE_THREAD_GPGPU(queue); /* Bind all user buffers (given by clSetKernelArg) */ uint32_t i; enum gbe_arg_type arg_type; /* kind of argument */ for (i = 0; i < k->arg_n; ++i) { uint32_t offset; // location of the address in the curbe arg_type = interp_kernel_get_arg_type(k->opaque, i); if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem) continue; offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem; cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); } else { cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); } } return CL_SUCCESS; } extern cl_int cl_command_queue_ND_range_gen7(cl_command_queue, cl_kernel, uint32_t, const size_t *, const size_t *, const size_t *); static cl_int cl_kernel_check_args(cl_kernel k) { uint32_t i; for (i = 0; i < k->arg_n; ++i) if (k->args[i].is_set == CL_FALSE) return CL_INVALID_KERNEL_ARGS; return CL_SUCCESS; } LOCAL cl_int cl_command_queue_ND_range(cl_command_queue queue, cl_kernel k, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz) { if(b_output_kernel_perf) time_start(queue->ctx, cl_kernel_get_name(k), queue); const int32_t ver = cl_driver_get_ver(queue->ctx->drv); cl_int err = CL_SUCCESS; /* Check that the user did not forget any argument */ TRY (cl_kernel_check_args, k); if (ver == 7 || ver == 75 || ver == 8 || ver == 9) TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, global_wk_sz, local_wk_sz); else FATAL ("Unknown Gen Device"); error: return err; } LOCAL int cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu) { size_t global_wk_sz[3]; size_t outbuf_sz = 0; void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz, &outbuf_sz); if (cl_gpgpu_flush(gpgpu) < 0) return CL_OUT_OF_RESOURCES; if (printf_info && interp_get_printf_num(printf_info)) { void *index_addr = cl_gpgpu_map_printf_buffer(gpgpu, 0); void *buf_addr = NULL; if (interp_get_printf_sizeof_size(printf_info)) buf_addr = cl_gpgpu_map_printf_buffer(gpgpu, 1); interp_output_printf(printf_info, index_addr, buf_addr, global_wk_sz[0], global_wk_sz[1], global_wk_sz[2], outbuf_sz); cl_gpgpu_unmap_printf_buffer(gpgpu, 0); if (interp_get_printf_sizeof_size(printf_info)) cl_gpgpu_unmap_printf_buffer(gpgpu, 1); } if (printf_info) { interp_release_printf_info(printf_info); global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0; cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz); } return CL_SUCCESS; } LOCAL cl_int cl_command_queue_flush(cl_command_queue queue) { int err; GET_QUEUE_THREAD_GPGPU(queue); err = cl_command_queue_flush_gpgpu(queue, gpgpu); // As we don't have a deadicate timer thread to take care the possible // event which has a call back function registerred and the event will // be released at the call back function, no other function will access // the event any more. If we don't do this here, we will leak that event // and all the corresponding buffers which is really bad. cl_event last_event = get_last_event(queue); if (last_event && last_event->user_cb) cl_event_update_status(last_event, 1); cl_event current_event = get_current_event(queue); if (current_event && err == CL_SUCCESS) { err = cl_event_flush(current_event); set_current_event(queue, NULL); } cl_invalid_thread_gpgpu(queue); return err; } LOCAL cl_int cl_command_queue_finish(cl_command_queue queue) { cl_gpgpu_sync(cl_get_thread_batch_buf(queue)); return CL_SUCCESS; } #define DEFAULT_WAIT_EVENTS_SIZE 16 LOCAL void cl_command_queue_insert_event(cl_command_queue queue, cl_event event) { cl_int i=0; cl_event *new_list; assert(queue != NULL); if(queue->wait_events == NULL) { queue->wait_events_size = DEFAULT_WAIT_EVENTS_SIZE; TRY_ALLOC_NO_ERR (queue->wait_events, CALLOC_ARRAY(cl_event, queue->wait_events_size)); } for(i=0; iwait_events_num; i++) { if(queue->wait_events[i] == event) return; //is in the wait_events, need to insert } if(queue->wait_events_num < queue->wait_events_size) { queue->wait_events[queue->wait_events_num++] = event; return; } //wait_events_num == wait_events_size, array is full queue->wait_events_size *= 2; TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->wait_events_size)); memcpy(new_list, queue->wait_events, sizeof(cl_event)*queue->wait_events_num); cl_free(queue->wait_events); queue->wait_events = new_list; queue->wait_events[queue->wait_events_num++] = event; return; exit: return; error: if(queue->wait_events) cl_free(queue->wait_events); queue->wait_events = NULL; queue->wait_events_size = 0; queue->wait_events_num = 0; goto exit; } LOCAL void cl_command_queue_remove_event(cl_command_queue queue, cl_event event) { cl_int i=0; assert(queue->wait_events); for(i=0; iwait_events_num; i++) { if(queue->wait_events[i] == event) break; } if(i == queue->wait_events_num) return; if(i == queue->wait_events_num - 1) { queue->wait_events[i] = NULL; } else { for(; iwait_events_num-1; i++) { queue->wait_events[i] = queue->wait_events[i+1]; } } queue->wait_events_num -= 1; } #define DEFAULT_WAIT_EVENTS_SIZE 16 LOCAL void cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event) { cl_int i=0; cl_event *new_list; assert(queue != NULL); if(queue->barrier_events == NULL) { queue->barrier_events_size = DEFAULT_WAIT_EVENTS_SIZE; TRY_ALLOC_NO_ERR (queue->barrier_events, CALLOC_ARRAY(cl_event, queue->barrier_events_size)); } for(i=0; ibarrier_events_num; i++) { if(queue->barrier_events[i] == event) return; //is in the barrier_events, need to insert } if(queue->barrier_events_num < queue->barrier_events_size) { queue->barrier_events[queue->barrier_events_num++] = event; return; } //barrier_events_num == barrier_events_size, array is full queue->barrier_events_size *= 2; TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->barrier_events_size)); memcpy(new_list, queue->barrier_events, sizeof(cl_event)*queue->barrier_events_num); cl_free(queue->barrier_events); queue->barrier_events = new_list; queue->barrier_events[queue->barrier_events_num++] = event; return; exit: return; error: if(queue->barrier_events) cl_free(queue->barrier_events); queue->barrier_events = NULL; queue->barrier_events_size = 0; queue->barrier_events_num = 0; goto exit; } LOCAL void cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event) { cl_int i=0; if(queue->barrier_events_num == 0) return; for(i=0; ibarrier_events_num; i++) { if(queue->barrier_events[i] == event) break; } if(i == queue->barrier_events_num) return; if(i == queue->barrier_events_num - 1) { queue->barrier_events[i] = NULL; } else { for(; ibarrier_events_num-1; i++) { queue->barrier_events[i] = queue->barrier_events[i+1]; } } queue->barrier_events_num -= 1; } Beignet-1.1.1-Source/src/cl_gt_device.h000664 001750 001750 00000012533 12605356050 017032 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Common fields for both all GT devices (IVB / SNB) */ .device_type = CL_DEVICE_TYPE_GPU, .device_id=0,/* == device_id (set when requested) */ .vendor_id = INTEL_VENDOR_ID, .max_work_item_dimensions = 3, .max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1}, .max_2d_global_work_sizes = {8192, 8192, 1}, .max_3d_global_work_sizes = {8192, 8192, 2048}, .preferred_vector_width_char = 16, .preferred_vector_width_short = 8, .preferred_vector_width_int = 4, .preferred_vector_width_long = 2, .preferred_vector_width_float = 4, .preferred_vector_width_double = 0, .preferred_vector_width_half = 0, .native_vector_width_char = 8, .native_vector_width_short = 8, .native_vector_width_int = 4, .native_vector_width_long = 2, .native_vector_width_float = 4, .native_vector_width_double = 2, .native_vector_width_half = 8, .preferred_wg_sz_mul = 16, .address_bits = 32, .max_mem_alloc_size = 512 * 1024 * 1024, .image_support = CL_TRUE, .max_read_image_args = BTI_MAX_READ_IMAGE_ARGS, .max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS, .image_max_array_size = 2048, .image2d_max_width = 8192, .image2d_max_height = 8192, .image3d_max_width = 8192, .image3d_max_height = 8192, .image3d_max_depth = 2048, .image_mem_size = 65536, .max_samplers = 16, .mem_base_addr_align = sizeof(cl_long) * 16 * 8, .min_data_type_align_size = sizeof(cl_long) * 16, .double_fp_config = 0, .global_mem_cache_type = CL_READ_WRITE_CACHE, .global_mem_size = 1024 * 1024 * 1024, .max_constant_buffer_size = 128 * 1024 * 1024, .max_constant_args = 8, .error_correction_support = CL_FALSE, #ifdef HAS_USERPTR .host_unified_memory = CL_TRUE, #else .host_unified_memory = CL_FALSE, #endif .profiling_timer_resolution = 80, /* ns */ .endian_little = CL_TRUE, .available = CL_TRUE, .compiler_available = CL_TRUE, .linker_available = CL_TRUE, .execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL, .queue_properties = CL_QUEUE_PROFILING_ENABLE, .platform = NULL, /* == intel_platform (set when requested) */ /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */ .single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */ .half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , .printf_buffer_size = 1 * 1024 * 1024, .interop_user_sync = CL_TRUE, #define DECL_INFO_STRING(FIELD, STRING) \ .FIELD = STRING, \ .JOIN(FIELD,_sz) = sizeof(STRING), DECL_INFO_STRING(name, "Intel HD Graphics Family") DECL_INFO_STRING(vendor, "Intel") DECL_INFO_STRING(version, LIBCL_VERSION_STRING) DECL_INFO_STRING(profile, "FULL_PROFILE") DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING) DECL_INFO_STRING(extensions, "") DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;" "__cl_copy_region_align16;" "__cl_cpy_region_unalign_same_offset;" "__cl_copy_region_unalign_dst_offset;" "__cl_copy_region_unalign_src_offset;" "__cl_copy_buffer_rect;" "__cl_copy_image_1d_to_1d;" "__cl_copy_image_2d_to_2d;" "__cl_copy_image_3d_to_2d;" "__cl_copy_image_2d_to_3d;" "__cl_copy_image_3d_to_3d;" "__cl_copy_image_2d_to_buffer;" "__cl_copy_image_3d_to_buffer;" "__cl_copy_buffer_to_image_2d;" "__cl_copy_buffer_to_image_3d;" "__cl_fill_region_unalign;" "__cl_fill_region_align2;" "__cl_fill_region_align4;" "__cl_fill_region_align8_2;" "__cl_fill_region_align8_4;" "__cl_fill_region_align8_8;" "__cl_fill_region_align8_16;" "__cl_fill_region_align128;" "__cl_fill_image_1d;" "__cl_fill_image_1d_array;" "__cl_fill_image_2d;" "__cl_fill_image_2d_array;" "__cl_fill_image_3d;") DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING) #undef DECL_INFO_STRING .parent_device = NULL, .partition_max_sub_device = 1, .partition_property = {0}, .affinity_domain = 0, .partition_type = {0}, .device_reference_count = 1, Beignet-1.1.1-Source/src/cl_sampler.h000664 001750 001750 00000004043 12576733264 016556 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_SAMPLER_H__ #define __CL_SAMPLER_H__ #include "CL/cl.h" #include "../backend/src/ocl_common_defines.h" #include /* How to access images */ struct _cl_sampler { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a sampler object */ volatile int ref_n; /* This object is reference counted */ cl_sampler prev, next; /* We chain the samplers in the allocator */ cl_context ctx; /* Context it belongs to */ cl_bool normalized_coords; /* Are coordinates normalized? */ cl_addressing_mode address;/* CLAMP / REPEAT and so on... */ cl_filter_mode filter; /* LINEAR / NEAREST mostly */ uint32_t clkSamplerValue; }; /* Create a new sampler object */ extern cl_sampler cl_sampler_new(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int *err); /* Unref the object and delete it if no more reference on it */ extern void cl_sampler_delete(cl_sampler); /* Add one more reference to this object */ extern void cl_sampler_add_ref(cl_sampler); /* set a sampler kernel argument */ int cl_set_sampler_arg_slot(cl_kernel k, int index, cl_sampler sampler); #endif /* __CL_SAMPLER_H__ */ Beignet-1.1.1-Source/src/cl_image.h000664 001750 001750 00000003244 12576733264 016177 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_IMAGE_H__ #define __CL_IMAGE_H__ #include "cl_internals.h" #include "CL/cl.h" #include /* Returned when the OCL format is not supported */ #define INTEL_UNSUPPORTED_FORMAT ((uint32_t) ~0x0u) /* Compute the number of bytes per pixel if the format is supported */ extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp); /* Return the intel format for the given OCL format */ extern uint32_t cl_image_get_intel_format(const cl_image_format *fmt); /* Return the list of formats supported by the API */ extern cl_int cl_image_get_supported_fmt(cl_context context, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats); #endif /* __CL_IMAGE_H__ */ Beignet-1.1.1-Source/src/cl_extensions.c000664 001750 001750 00000007665 12605356050 017305 0ustar00yryr000000 000000 #include "llvm/Config/llvm-config.h" #ifdef HAS_EGL #include "EGL/egl.h" #include "EGL/eglext.h" #endif #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_internals.h" #include "CL/cl.h" #include "cl_utils.h" #include #include #include /* This extension should be common for all the intel GPU platform. Every device may have its own additional externsions. */ static struct cl_extensions intel_platform_extensions = { { #define DECL_EXT(name) \ {(struct cl_extension_base){.ext_id = cl_##name##_ext_id, .ext_name = "cl_" #name, .ext_enabled = 0}}, DECL_ALL_EXTENSIONS }, #undef DECL_EXT {""} }; void check_basic_extension(cl_extensions_t *extensions) { int id; for(id = BASE_EXT_START_ID; id <= BASE_EXT_END_ID; id++) if (id != EXT_ID(khr_fp64)) extensions->extensions[id].base.ext_enabled = 1; } void check_opt1_extension(cl_extensions_t *extensions) { int id; for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) { if (id == EXT_ID(khr_icd)) extensions->extensions[id].base.ext_enabled = 1; #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 if (id == EXT_ID(khr_spir)) extensions->extensions[id].base.ext_enabled = 1; #endif } } void check_gl_extension(cl_extensions_t *extensions) { #if defined(HAS_EGL) int id; /* For now, we only support cl_khr_gl_sharing. */ for(id = GL_EXT_START_ID; id <= GL_EXT_END_ID; id++) if (id == EXT_ID(khr_gl_sharing)) extensions->extensions[id].base.ext_enabled = 1; #endif } void check_intel_extension(cl_extensions_t *extensions) { /* Should put those map/unmap extensions here. */ } void process_extension_str(cl_extensions_t *extensions) { int str_max = sizeof(extensions->ext_str); int str_offset = 0; int id; memset(extensions->ext_str, 0, sizeof(extensions->ext_str)); for(id = 0; id < cl_khr_extension_id_max; id++) { if (extensions->extensions[id].base.ext_enabled) { int copy_len; char *ext_name = extensions->extensions[id].base.ext_name; if (str_offset + 1 >= str_max) return; if (str_offset != 0) extensions->ext_str[str_offset - 1] = ' '; copy_len = (strlen(ext_name) + 1 + str_offset) < str_max ? (strlen(ext_name) + 1) : (str_max - str_offset - 1); strncpy(&extensions->ext_str[str_offset], extensions->extensions[id].base.ext_name, copy_len); str_offset += copy_len; } } } LOCAL void cl_intel_platform_get_default_extension(cl_device_id device) { cl_platform_id pf = device->platform; memcpy((char*)device->extensions, pf->internal_extensions->ext_str, sizeof(device->extensions)); device->extensions_sz = strlen(pf->internal_extensions->ext_str) + 1; } LOCAL void cl_intel_platform_enable_fp16_extension(cl_device_id device) { cl_extensions_t new_ext; cl_platform_id pf = device->platform; int id; assert(pf); memcpy(&new_ext, pf->internal_extensions, sizeof(new_ext)); for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) { if (id == EXT_ID(khr_fp16)) new_ext.extensions[id].base.ext_enabled = 1; } process_extension_str(&new_ext); memcpy((char*)device->extensions, new_ext.ext_str, sizeof(device->extensions)); device->extensions_sz = strlen(new_ext.ext_str) + 1; } LOCAL void cl_intel_platform_extension_init(cl_platform_id intel_platform) { static int ext_initialized = 0; /* The EXT should be only inited once. */ assert(!ext_initialized); check_basic_extension(&intel_platform_extensions); check_opt1_extension(&intel_platform_extensions); check_gl_extension(&intel_platform_extensions); check_intel_extension(&intel_platform_extensions); process_extension_str(&intel_platform_extensions); ext_initialized = 1; intel_platform->internal_extensions = &intel_platform_extensions; intel_platform->extensions = intel_platform_extensions.ext_str; intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1; return; } Beignet-1.1.1-Source/src/intel/intel_batchbuffer.c000664 001750 001750 00000012611 12576733264 021211 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include "intel/intel_batchbuffer.h" #include "intel/intel_driver.h" #include "cl_alloc.h" #include "cl_utils.h" #include #include #include #include LOCAL int intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz) { if (batch->buffer != NULL) { dri_bo_unreference(batch->buffer); batch->buffer = NULL; batch->last_bo = NULL; } batch->buffer = dri_bo_alloc(batch->intel->bufmgr, "batch buffer", sz, 64); if (!batch->buffer || (dri_bo_map(batch->buffer, 1) != 0)) { if (batch->buffer) dri_bo_unreference(batch->buffer); batch->buffer = NULL; return -1; } batch->map = (uint8_t*) batch->buffer->virtual; batch->size = sz; batch->ptr = batch->map; batch->atomic = 0; batch->last_bo = batch->buffer; batch->enable_slm = 0; return 0; } LOCAL void intel_batchbuffer_init(intel_batchbuffer_t *batch, intel_driver_t *intel) { assert(intel); batch->intel = intel; } LOCAL void intel_batchbuffer_terminate(intel_batchbuffer_t *batch) { assert(batch->buffer); if (batch->map) { dri_bo_unmap(batch->buffer); batch->map = NULL; } dri_bo_unreference(batch->buffer); batch->buffer = NULL; } LOCAL int intel_batchbuffer_flush(intel_batchbuffer_t *batch) { uint32_t used = batch->ptr - batch->map; int is_locked = batch->intel->locked; int err = 0; if (used == 0) return 0; if ((used & 4) == 0) { *(uint32_t*) batch->ptr = 0; batch->ptr += 4; } *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END; batch->ptr += 4; used = batch->ptr - batch->map; dri_bo_unmap(batch->buffer); batch->ptr = batch->map = NULL; if (!is_locked) intel_driver_lock_hardware(batch->intel); int flag = I915_EXEC_RENDER; if(batch->enable_slm) { /* use the hard code here temp, must change to * I915_EXEC_ENABLE_SLM when it drm accept the patch */ flag |= (1<<13); } if (drm_intel_gem_bo_context_exec(batch->buffer, batch->intel->ctx, used, flag) < 0) { fprintf(stderr, "drm_intel_gem_bo_context_exec() failed: %s\n", strerror(errno)); err = -1; } if (!is_locked) intel_driver_unlock_hardware(batch->intel); return err; } LOCAL void intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch, dri_bo *bo, uint32_t read_domains, uint32_t write_domains, uint32_t delta) { assert(batch->ptr - batch->map < batch->size); dri_bo_emit_reloc(batch->buffer, read_domains, write_domains, delta, batch->ptr - batch->map, bo); intel_batchbuffer_emit_dword(batch, bo->offset + delta); } LOCAL intel_batchbuffer_t* intel_batchbuffer_new(intel_driver_t *intel) { intel_batchbuffer_t *batch = NULL; assert(intel); TRY_ALLOC_NO_ERR (batch, CALLOC(intel_batchbuffer_t)); intel_batchbuffer_init(batch, intel); exit: return batch; error: intel_batchbuffer_delete(batch); batch = NULL; goto exit; } LOCAL void intel_batchbuffer_delete(intel_batchbuffer_t *batch) { if (batch == NULL) return; if(batch->buffer) intel_batchbuffer_terminate(batch); cl_free(batch); } Beignet-1.1.1-Source/src/intel/intel_dri_resource_sharing.c000664 001750 001750 00000015743 12576733264 023147 0ustar00yryr000000 000000 /************************************************************************** * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #define HAVE_PTHREAD 1 #include #include #include "main/context.h" #include "main/renderbuffer.h" #include "main/texobj.h" #include #include #include #include #include #include #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_context.h" #include "intel_dri_resource_sharing.h" #include "intel_dri_resource_sharing_int.h" #include /** * Sets up a DRIImage structure to point to our shared image in a region */ static bool intel_setup_cl_region_from_mipmap_tree(void *driver, struct intel_context *intel, struct intel_mipmap_tree *mt, GLuint level, GLuint zoffset, struct _intel_dri_share_image_region *region) { unsigned int draw_x, draw_y; uint32_t mask_x, mask_y; struct intel_region *null_region = (struct intel_region *)NULL; intel_miptree_check_level_layer(mt, level, zoffset); _intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false); _intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y); region->w = mt->level[level].width; region->h = mt->level[level].height; region->tile_x = draw_x & mask_x; region->tile_y = draw_y & mask_y; region->tiling = mt->region->tiling; /* XXX hard code to 1 right now. */ region->depth = 1; region->row_pitch = mt->region->pitch; region->offset = _intel_region_get_aligned_offset(mt->region, draw_x & ~mask_x, draw_y & ~mask_y, false); if (!_intel_region_flink(mt->region, ®ion->name)) return false; _intel_region_reference(&null_region, mt->region); return true; } typedef void _mesa_test_texobj_completeness_t( const struct gl_context *ctx, struct gl_texture_object *t ); _mesa_test_texobj_completeness_t *__mesa_test_texobj_completeness; typedef struct gl_texture_object * _mesa_lookup_texture_t( const struct gl_context *ctx, GLuint id); _mesa_lookup_texture_t *__mesa_lookup_texture; static struct gl_texture_object * intel_get_gl_obj_from_texture(void *driver, struct intel_context *intel, GLenum target, GLint level, GLuint texture, GLuint face) { struct gl_texture_object *obj; __mesa_lookup_texture = dlsym(driver, "_mesa_lookup_texture"); obj = __mesa_lookup_texture(&intel->ctx, texture); if (!obj || obj->Target != target) { return NULL; } __mesa_test_texobj_completeness = dlsym(driver, "_mesa_test_texobj_completeness"); __mesa_test_texobj_completeness(&intel->ctx, obj); if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) { return NULL; } if (level < obj->BaseLevel || level > obj->_MaxLevel) { return NULL; } return obj; } static GLenum get_cl_gl_format(mesa_format format) { switch (format) { case MESA_FORMAT_R8G8B8A8_UNORM: return GL_RGBA; case MESA_FORMAT_A8R8G8B8_UNORM: return GL_BGRA; default: return GL_BGRA; } } static bool intelAcquireTexture(void *driver, __DRIcontext *context, GLenum target, GLint level, GLuint texture, void *user_data) { struct _intel_dri_share_image_region *region = intel_dri_share_image_region(user_data); struct intel_context *intel = context->driverPrivate; struct gl_texture_object *obj; struct intel_texture_object *iobj; /* XXX Always be face 0? */ GLuint face = 0; obj = intel_get_gl_obj_from_texture(driver, intel, target, level, texture, face); if (obj == NULL) return false; iobj = intel_texture_object(obj); region->gl_format = get_cl_gl_format(obj->Image[face][level]->TexFormat); return intel_setup_cl_region_from_mipmap_tree(driver, intel, iobj->mt, level, 0, region); } static bool intelReleaseTexture(void *driver, __DRIcontext *context, GLenum target, GLint level, GLuint texture) { struct intel_context *intel = context->driverPrivate; struct gl_texture_object *obj; struct intel_texture_object *iobj; /* XXX Always be face 0? */ GLuint face = 0; obj = intel_get_gl_obj_from_texture(driver, intel, target, level, texture, face); if (obj == NULL) return false; iobj = intel_texture_object(obj); _intel_region_release(&iobj->mt->region); return true; } static bool intelAcquireBufferObj(void *driver, __DRIcontext *driContextPriv, GLuint bufobj, void *user_data) { return false; } static bool intelReleaseBufferObj(void *driver, __DRIcontext *driContextPriv, GLuint bufobj) { return false; } static bool intelAcquireRenderBuffer(void *driver, __DRIcontext *driContextPriv, GLuint bufobj, void *user_data) { return false; } static bool intelReleaseRenderBuffer(void *driver, __DRIcontext *driContextPriv, GLuint bufobj) { return false; } #include "cl_driver.h" void intel_set_cl_gl_callbacks(void) { cl_gl_acquire_texture = (cl_gl_acquire_texture_cb*)intelAcquireTexture; cl_gl_release_texture = (cl_gl_release_texture_cb*)intelReleaseTexture; cl_gl_acquire_buffer_object = (cl_gl_acquire_buffer_object_cb*)intelAcquireBufferObj; cl_gl_release_buffer_object = (cl_gl_release_buffer_object_cb*)intelReleaseBufferObj; cl_gl_acquire_render_buffer = (cl_gl_acquire_render_buffer_cb*)intelAcquireRenderBuffer; cl_gl_release_render_buffer = (cl_gl_release_render_buffer_cb*)intelReleaseRenderBuffer; } Beignet-1.1.1-Source/src/intel/intel_gpgpu.h000664 001750 001750 00000005656 12576733264 020100 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Alexei Soupikov */ #ifndef __INTEL_GPGPU_H__ #define __INTEL_GPGPU_H__ #include "cl_utils.h" #include "cl_driver.h" #include "intel/intel_batchbuffer.h" #include "intel/intel_driver.h" #include #include /* We can bind only a limited number of buffers */ enum { max_buf_n = 128 }; enum { max_img_n = 128}; enum {max_sampler_n = 16 }; struct intel_driver; struct intel_batchbuffer; /* Handle GPGPU state */ struct intel_gpgpu { void* ker_opaque; size_t global_wk_sz[3]; void* printf_info; struct intel_driver *drv; struct intel_batchbuffer *batch; cl_gpgpu_kernel *ker; drm_intel_bo *binded_buf[max_buf_n]; /* all buffers binded for the call */ uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */ uint32_t binded_offset[max_buf_n]; /* their offsets in the curbe buffer */ uint32_t binded_n; /* number of buffers binded */ unsigned long img_bitmap; /* image usage bitmap. */ unsigned int img_index_base; /* base index for image surface.*/ unsigned long sampler_bitmap; /* sampler usage bitmap. */ struct { drm_intel_bo *bo; } stack_b; struct { drm_intel_bo *bo; } perf_b; struct { drm_intel_bo *bo; } scratch_b; struct { drm_intel_bo *bo; } constant_b; struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */ struct { drm_intel_bo *bo; drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/ struct { drm_intel_bo *bo; } aux_buf; struct { uint32_t surface_heap_offset; uint32_t curbe_offset; uint32_t idrt_offset; uint32_t sampler_state_offset; uint32_t sampler_border_color_state_offset; } aux_offset; uint32_t per_thread_scratch; struct { uint32_t num_cs_entries; uint32_t size_cs_entry; /* size of one entry in 512bit elements */ } curb; uint32_t max_threads; /* max threads requested by the user */ }; struct intel_gpgpu_node { struct intel_gpgpu *gpgpu; struct intel_gpgpu_node *next; }; /* Set the gpgpu related call backs */ extern void intel_set_gpgpu_callbacks(int device_id); #endif /* __INTEL_GPGPU_H__ */ Beignet-1.1.1-Source/src/intel/intel_dri_resource_sharing.h000664 001750 001750 00000001451 12576733264 023143 0ustar00yryr000000 000000 #ifndef __INTEL_DRI_RESOURCE_SHARING_H__ #define __INTEL_DRI_RESOURCE_SHARING_H__ struct _intel_dri_share_image_region { unsigned int name; size_t w; size_t h; size_t depth; size_t pitch; int tiling; size_t offset; size_t tile_x; size_t tile_y; unsigned int gl_format; size_t row_pitch, slice_pitch; }; struct _intel_dri_share_buffer_object { unsigned int name; size_t sz; size_t offset; }; inline static struct _intel_dri_share_image_region * intel_dri_share_image_region(void *user_data) { return (struct _intel_dri_share_image_region *)user_data; } inline static struct _intel_dri_share_buffer_object * intel_dri_share_buffer_object(void *user_data) { return (struct _intel_dri_share_buffer_object *)user_data; } extern void intel_set_cl_gl_callbacks(void); #endif Beignet-1.1.1-Source/src/intel/intel_driver.c000664 001750 001750 00000060226 12605356050 020221 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2009 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Xiang Haihao * Zou Nan hai * */ #if defined(HAS_EGL) #include "GL/gl.h" #include "EGL/egl.h" #include "x11/mesa_egl_extension.h" #endif #ifdef HAS_X11 #include #include "x11/dricommon.h" #endif #include "intel_driver.h" #include "intel_gpgpu.h" #include "intel_batchbuffer.h" #include "intel_bufmgr.h" #include "cl_mem.h" #include #include #include #include #include #include #include #include #include "cl_utils.h" #include "cl_alloc.h" #include "cl_context.h" #include "cl_driver.h" #include "cl_device_id.h" #include "cl_platform_id.h" static void intel_driver_delete(intel_driver_t *driver) { if (driver == NULL) return; if (driver->bufmgr) drm_intel_bufmgr_destroy(driver->bufmgr); cl_free(driver); } static intel_driver_t* intel_driver_new(void) { intel_driver_t *driver = NULL; TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t)); driver->fd = -1; exit: return driver; error: intel_driver_delete(driver); driver = NULL; goto exit; } /* just used for maximum relocation number in drm_intel */ #define BATCH_SIZE 0x4000 /* set OCL_DUMP_AUB=1 to get aub file */ static void intel_driver_aub_dump(intel_driver_t *driver) { char *val; val = getenv("OCL_DUMP_AUB"); if (!val) return; if (atoi(val) != 0) { drm_intel_bufmgr_gem_set_aub_filename(driver->bufmgr, "beignet.aub"); drm_intel_bufmgr_gem_set_aub_dump(driver->bufmgr, 1); } } static int intel_driver_memman_init(intel_driver_t *driver) { driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE); if (!driver->bufmgr) return 0; drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr); driver->device_id = drm_intel_bufmgr_gem_get_devid(driver->bufmgr); intel_driver_aub_dump(driver); return 1; } static void intel_driver_context_init(intel_driver_t *driver) { driver->ctx = drm_intel_gem_context_create(driver->bufmgr); assert(driver->ctx); } static void intel_driver_context_destroy(intel_driver_t *driver) { if(driver->ctx) drm_intel_gem_context_destroy(driver->ctx); driver->ctx = NULL; } static int intel_driver_init(intel_driver_t *driver, int dev_fd) { driver->fd = dev_fd; driver->locked = 0; pthread_mutex_init(&driver->ctxmutex, NULL); if (!intel_driver_memman_init(driver)) return 0; intel_driver_context_init(driver); #if EMULATE_GEN driver->gen_ver = EMULATE_GEN; if (EMULATE_GEN == 75) driver->device_id = PCI_CHIP_HASWELL_L; /* we pick L for HSW */ else if (EMULATE_GEN == 7) driver->device_id = PCI_CHIP_IVYBRIDGE_GT2; /* we pick GT2 for IVB */ else if (EMULATE_GEN == 6) driver->device_id = PCI_CHIP_SANDYBRIDGE_GT2; /* we pick GT2 for SNB */ else FATAL ("Unsupported Gen for emulation"); #else if (IS_GEN9(driver->device_id)) driver->gen_ver = 9; else if (IS_GEN8(driver->device_id)) driver->gen_ver = 8; else if (IS_GEN75(driver->device_id)) driver->gen_ver = 75; else if (IS_GEN7(driver->device_id)) driver->gen_ver = 7; else if (IS_GEN6(driver->device_id)) driver->gen_ver = 6; else if(IS_IGDNG(driver->device_id)) driver->gen_ver = 5; else driver->gen_ver = 4; #endif /* EMULATE_GEN */ return 1; } static cl_int intel_driver_open(intel_driver_t *intel, cl_context_prop props) { int cardi; #ifdef HAS_X11 char *driver_name; #endif if (props != NULL && props->gl_type != CL_GL_NOSHARE && props->gl_type != CL_GL_GLX_DISPLAY && props->gl_type != CL_GL_EGL_DISPLAY) { fprintf(stderr, "Unsupported gl share type %d.\n", props->gl_type); return CL_INVALID_OPERATION; } #ifdef HAS_X11 intel->x11_display = XOpenDisplay(NULL); if(intel->x11_display) { if((intel->dri_ctx = getDRI2State(intel->x11_display, DefaultScreen(intel->x11_display), &driver_name))) { intel_driver_init_shared(intel, intel->dri_ctx); Xfree(driver_name); } else fprintf(stderr, "X server found. dri2 connection failed! \n"); } #endif if(!intel_driver_is_active(intel)) { char card_name[20]; for(cardi = 0; cardi < 16; cardi++) { sprintf(card_name, "/dev/dri/renderD%d", 128+cardi); if (access(card_name, R_OK) != 0) continue; if(intel_driver_init_render(intel, card_name)) break; } } if(!intel_driver_is_active(intel)) { char card_name[20]; for(cardi = 0; cardi < 16; cardi++) { sprintf(card_name, "/dev/dri/card%d", cardi); if (access(card_name, R_OK) != 0) continue; if(intel_driver_init_master(intel, card_name)) break; } } if(!intel_driver_is_active(intel)) { fprintf(stderr, "Device open failed, aborting...\n"); return CL_DEVICE_NOT_FOUND; } #ifdef HAS_EGL if (props && props->gl_type == CL_GL_EGL_DISPLAY) { assert(props->egl_display); } #endif return CL_SUCCESS; } static void intel_driver_close(intel_driver_t *intel) { #ifdef HAS_X11 if(intel->dri_ctx) dri_state_release(intel->dri_ctx); if(intel->x11_display) XCloseDisplay(intel->x11_display); #endif if(intel->need_close) { close(intel->fd); intel->need_close = 0; } intel->dri_ctx = NULL; intel->x11_display = NULL; intel->fd = -1; } LOCAL int intel_driver_is_active(intel_driver_t *driver) { return driver->fd >= 0; } #ifdef HAS_X11 LOCAL int intel_driver_init_shared(intel_driver_t *driver, dri_state_t *state) { int ret; assert(state); if(state->driConnectedFlag != DRI2) return 0; ret = intel_driver_init(driver, state->fd); driver->need_close = 0; return ret; } #endif LOCAL int intel_driver_init_master(intel_driver_t *driver, const char* dev_name) { int dev_fd, ret; drm_client_t client; // usually dev_name = "/dev/dri/card%d" dev_fd = open(dev_name, O_RDWR); if (dev_fd == -1) { fprintf(stderr, "open(\"%s\", O_RDWR) failed: %s\n", dev_name, strerror(errno)); return 0; } // Check that we're authenticated memset(&client, 0, sizeof(drm_client_t)); ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client); if (ret == -1) { fprintf(stderr, "ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client) failed: %s\n", strerror(errno)); close(dev_fd); return 0; } if (!client.auth) { fprintf(stderr, "%s not authenticated\n", dev_name); close(dev_fd); return 0; } ret = intel_driver_init(driver, dev_fd); driver->need_close = 1; return ret; } LOCAL int intel_driver_init_render(intel_driver_t *driver, const char* dev_name) { int dev_fd, ret; dev_fd = open(dev_name, O_RDWR); if (dev_fd == -1) return 0; ret = intel_driver_init(driver, dev_fd); driver->need_close = 1; return ret; } LOCAL int intel_driver_terminate(intel_driver_t *driver) { pthread_mutex_destroy(&driver->ctxmutex); if(driver->need_close) { close(driver->fd); driver->need_close = 0; } driver->fd = -1; return 1; } LOCAL void intel_driver_lock_hardware(intel_driver_t *driver) { PPTHREAD_MUTEX_LOCK(driver); assert(!driver->locked); driver->locked = 1; } LOCAL void intel_driver_unlock_hardware(intel_driver_t *driver) { driver->locked = 0; PPTHREAD_MUTEX_UNLOCK(driver); } LOCAL dri_bo* intel_driver_share_buffer(intel_driver_t *driver, const char *sname, uint32_t name) { dri_bo *bo = intel_bo_gem_create_from_name(driver->bufmgr, sname, name); if (bo == NULL) { fprintf(stderr, "intel_bo_gem_create_from_name create \"%s\" bo from name %d failed: %s\n", sname, name, strerror(errno)); return NULL; } return bo; } LOCAL uint32_t intel_driver_shared_name(intel_driver_t *driver, dri_bo *bo) { uint32_t name; assert(bo); dri_bo_flink(bo, &name); return name; } /* XXX a null props is ok? */ static int intel_get_device_id(void) { intel_driver_t *driver = NULL; int intel_device_id; driver = intel_driver_new(); assert(driver != NULL); if(UNLIKELY(intel_driver_open(driver, NULL) != CL_SUCCESS)) return INVALID_CHIP_ID; intel_device_id = driver->device_id; intel_driver_context_destroy(driver); intel_driver_close(driver); intel_driver_terminate(driver); intel_driver_delete(driver); return intel_device_id; } extern void intel_gpgpu_delete_all(intel_driver_t *driver); static void cl_intel_driver_delete(intel_driver_t *driver) { if (driver == NULL) return; intel_gpgpu_delete_all(driver); intel_driver_context_destroy(driver); intel_driver_close(driver); intel_driver_terminate(driver); intel_driver_delete(driver); } #include "cl_gbe_loader.h" static intel_driver_t* cl_intel_driver_new(cl_context_prop props) { intel_driver_t *driver = NULL; TRY_ALLOC_NO_ERR (driver, intel_driver_new()); if(UNLIKELY(intel_driver_open(driver, props) != CL_SUCCESS)) goto error; exit: return driver; error: cl_intel_driver_delete(driver); driver = NULL; goto exit; } static drm_intel_bufmgr* intel_driver_get_bufmgr(intel_driver_t *drv) { return drv->bufmgr; } static uint32_t intel_driver_get_ver(struct intel_driver *drv) { return drv->gen_ver; } static void intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) { drv->atomic_test_result = atomic_flag; } static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; } static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo->virtual; } static int get_cl_tiling(uint32_t drm_tiling) { switch(drm_tiling) { case I915_TILING_X: return CL_TILE_X; case I915_TILING_Y: return CL_TILE_Y; case I915_TILING_NONE: return CL_NO_TILE; default: assert(0); } return CL_NO_TILE; } static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim) { uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver; uint32_t ret = 0; switch (tiling_mode) { case CL_TILE_X: if (dim == 0) { //tileX width in bytes ret = 512; } else if (dim == 1) { //tileX height in number of rows ret = 8; } else if (dim == 2) { //height to calculate slice pitch if (gen_ver == 9) //SKL same as tileY height ret = 8; else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment ret = 4; else ret = 2; } else assert(0); break; case CL_TILE_Y: if (dim == 0) { //tileY width in bytes ret = 128; } else if (dim == 1) { //tileY height in number of rows ret = 32; } else if (dim == 2) { //height to calculate slice pitch if (gen_ver == 9) //SKL same as tileY height ret = 32; else if (gen_ver == 8) //IVB, HSW, BDW same as CL_NO_TILE vertical alignment ret = 4; else ret = 2; } else assert(0); break; case CL_NO_TILE: if (dim == 1 || dim == 2) { //vertical alignment if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch ret = 4; else ret = 2; } else assert(0); break; } return ret; } #if defined(HAS_EGL) #include "intel_dri_resource_sharing.h" #include "cl_image.h" static int cl_get_clformat_from_texture(GLint tex_format, cl_image_format * cl_format) { cl_int ret = CL_SUCCESS; switch (tex_format) { case GL_RGBA8: case GL_RGBA: case GL_RGBA16: case GL_RGBA8I: case GL_RGBA16I: case GL_RGBA32I: case GL_RGBA8UI: case GL_RGBA16UI: case GL_RGBA32UI: case GL_RGBA16F: case GL_RGBA32F: cl_format->image_channel_order = CL_RGBA; break; case GL_BGRA: cl_format->image_channel_order = CL_BGRA; break; default: ret = -1; goto error; } switch (tex_format) { case GL_RGBA8: case GL_RGBA: case GL_BGRA: cl_format->image_channel_data_type = CL_UNORM_INT8; break; case GL_RGBA16: cl_format->image_channel_data_type = CL_UNORM_INT16; break; case GL_RGBA8I: cl_format->image_channel_data_type = CL_SIGNED_INT8; break; case GL_RGBA16I: cl_format->image_channel_data_type = CL_SIGNED_INT16; break; case GL_RGBA32I: cl_format->image_channel_data_type = CL_SIGNED_INT32; break; case GL_RGBA8UI: cl_format->image_channel_data_type = CL_UNSIGNED_INT8; break; case GL_RGBA16UI: cl_format->image_channel_data_type = CL_UNSIGNED_INT16; break; case GL_RGBA32UI: cl_format->image_channel_data_type = CL_UNSIGNED_INT32; break; case GL_RGBA16F: cl_format->image_channel_data_type = CL_HALF_FLOAT; break; case GL_RGBA32F: cl_format->image_channel_order = CL_FLOAT; break; default: ret = -1; goto error; } error: return ret; } static int get_mem_type_from_target(GLenum texture_target, cl_mem_object_type *type) { switch(texture_target) { case GL_TEXTURE_1D: *type = CL_MEM_OBJECT_IMAGE1D; break; case GL_TEXTURE_2D: *type = CL_MEM_OBJECT_IMAGE2D; break; case GL_TEXTURE_3D: *type = CL_MEM_OBJECT_IMAGE3D; break; case GL_TEXTURE_1D_ARRAY: *type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break; case GL_TEXTURE_2D_ARRAY: *type = CL_MEM_OBJECT_IMAGE2D_ARRAY; break; default: return -1; } return CL_SUCCESS; } static cl_buffer intel_alloc_buffer_from_texture_egl(cl_context ctx, unsigned int target, int miplevel, unsigned int texture, struct _cl_mem_image *image) { cl_buffer bo = (cl_buffer) NULL; struct _intel_dri_share_image_region region; unsigned int bpp, intel_fmt; cl_image_format cl_format; EGLBoolean ret; EGLint attrib_list[] = { EGL_GL_TEXTURE_ID_MESA, texture, EGL_GL_TEXTURE_LEVEL_MESA, miplevel, EGL_GL_TEXTURE_TARGET_MESA, target, EGL_NONE}; ret = eglAcquireResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0], ®ion); if (!ret) goto out; bo = (cl_buffer)intel_driver_share_buffer((intel_driver_t *)ctx->drv, "rendering buffer", region.name); if (bo == NULL) { eglReleaseResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0]); goto out; } region.tiling = get_cl_tiling(region.tiling); if (cl_get_clformat_from_texture(region.gl_format, &cl_format) != 0) goto error; if (cl_image_byte_per_pixel(&cl_format, &bpp) != CL_SUCCESS) goto error; intel_fmt = cl_image_get_intel_format(&cl_format); if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) goto error; cl_mem_object_type image_type; if (get_mem_type_from_target(target, &image_type) != 0) goto error; cl_mem_image_init(image, region.w, region.h, image_type, region.depth, cl_format, intel_fmt, bpp, region.row_pitch, region.slice_pitch, region.tiling, region.tile_x, region.tile_y, region.offset); out: return bo; error: cl_buffer_unreference(bo); eglReleaseResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0]); return NULL; } static cl_buffer intel_alloc_buffer_from_texture(cl_context ctx, unsigned int target, int miplevel, unsigned int texture, struct _cl_mem_image *image) { if (IS_EGL_CONTEXT(ctx)) return intel_alloc_buffer_from_texture_egl(ctx, target, miplevel, texture, image); return NULL; } static int intel_release_buffer_from_texture(cl_context ctx, unsigned int target, int miplevel, unsigned int texture) { if (IS_EGL_CONTEXT(ctx)) { EGLint attrib_list[] = { EGL_GL_TEXTURE_ID_MESA, texture, EGL_GL_TEXTURE_LEVEL_MESA, miplevel, EGL_GL_TEXTURE_TARGET_MESA, target, EGL_NONE}; eglReleaseResourceMESA(EGL_DISP(ctx), EGL_CTX(ctx), EGL_GL_TEXTURE_MESA, &attrib_list[0]); return CL_SUCCESS; } return -1; } #endif cl_buffer intel_share_buffer_from_libva(cl_context ctx, unsigned int bo_name, size_t *sz) { drm_intel_bo *intel_bo; intel_bo = intel_driver_share_buffer((intel_driver_t *)ctx->drv, "shared from libva", bo_name); if (intel_bo == NULL) return NULL; if (sz) *sz = intel_bo->size; return (cl_buffer)intel_bo; } cl_buffer intel_share_image_from_libva(cl_context ctx, unsigned int bo_name, struct _cl_mem_image *image) { drm_intel_bo *intel_bo; uint32_t intel_tiling, intel_swizzle_mode; intel_bo = intel_driver_share_buffer((intel_driver_t *)ctx->drv, "shared from libva", bo_name); drm_intel_bo_get_tiling(intel_bo, &intel_tiling, &intel_swizzle_mode); image->tiling = get_cl_tiling(intel_tiling); return (cl_buffer)intel_bo; } static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags) { #ifdef HAS_USERPTR drm_intel_bo *bo; bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags); /* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */ if (bo == NULL) bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED); return (cl_buffer)bo; #else return NULL; #endif } static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling) { switch (tiling) { case CL_NO_TILE: *intel_tiling = I915_TILING_NONE; break; case CL_TILE_X: *intel_tiling = I915_TILING_X; break; case CL_TILE_Y: *intel_tiling = I915_TILING_Y; break; default: assert(0); return -1; } return 0; } static int intel_buffer_set_tiling(cl_buffer bo, cl_image_tiling_t tiling, size_t stride) { uint32_t intel_tiling; int ret; if (UNLIKELY((get_intel_tiling(tiling, &intel_tiling)) < 0)) return -1; #ifndef NDEBUG uint32_t required_tiling; required_tiling = intel_tiling; #endif ret = drm_intel_bo_set_tiling((drm_intel_bo*)bo, &intel_tiling, stride); assert(intel_tiling == required_tiling); return ret; } #define CHV_CONFIG_WARNING \ "Warning: can't get GPU's configurations, will use the minimal one. Please update your drm to 2.4.59+ and linux kernel to 4.0.0+.\n" static void intel_update_device_info(cl_device_id device) { intel_driver_t *driver; driver = intel_driver_new(); assert(driver != NULL); if (intel_driver_open(driver, NULL) != CL_SUCCESS) { intel_driver_delete(driver); return; } #ifdef HAS_USERPTR const size_t sz = 4096; void *host_ptr; host_ptr = cl_aligned_malloc(sz, 4096); if (host_ptr != NULL) { cl_buffer bo = intel_buffer_alloc_userptr((cl_buffer_mgr)driver->bufmgr, "CL memory object", host_ptr, sz, 0); if (bo == NULL) device->host_unified_memory = CL_FALSE; else drm_intel_bo_unreference((drm_intel_bo*)bo); cl_free(host_ptr); } else device->host_unified_memory = CL_FALSE; #endif #ifdef HAS_EU_TOTAL unsigned int eu_total; /* Prefer driver-queried max compute units if supported */ if (!drm_intel_get_eu_total(driver->fd, &eu_total)) device->max_compute_unit = eu_total; else if (IS_CHERRYVIEW(device->device_id)) printf(CHV_CONFIG_WARNING); #else if (IS_CHERRYVIEW(device->device_id)) printf(CHV_CONFIG_WARNING); #endif #ifdef HAS_SUBSLICE_TOTAL unsigned int subslice_total; /* Prefer driver-queried subslice count if supported */ if (!drm_intel_get_subslice_total(driver->fd, &subslice_total)) device->sub_slice_count = subslice_total; else if (IS_CHERRYVIEW(device->device_id)) printf(CHV_CONFIG_WARNING); #else if (IS_CHERRYVIEW(device->device_id)) printf(CHV_CONFIG_WARNING); #endif intel_driver_context_destroy(driver); intel_driver_close(driver); intel_driver_terminate(driver); intel_driver_delete(driver); } LOCAL void intel_setup_callbacks(void) { cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new; cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete; cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver; cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag; cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr; cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id; cl_driver_update_device_info = (cl_driver_update_device_info_cb *) intel_update_device_info; cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc; cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr; cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling; #if defined(HAS_EGL) cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture; cl_buffer_release_from_texture = (cl_buffer_release_from_texture_cb *) intel_release_buffer_from_texture; intel_set_cl_gl_callbacks(); #endif cl_buffer_get_buffer_from_libva = (cl_buffer_get_buffer_from_libva_cb *) intel_share_buffer_from_libva; cl_buffer_get_image_from_libva = (cl_buffer_get_image_from_libva_cb *) intel_share_image_from_libva; cl_buffer_reference = (cl_buffer_reference_cb *) drm_intel_bo_reference; cl_buffer_unreference = (cl_buffer_unreference_cb *) drm_intel_bo_unreference; cl_buffer_map = (cl_buffer_map_cb *) drm_intel_bo_map; cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap; cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt; cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt; cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized; cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual; cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size; cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin; cl_buffer_unpin = (cl_buffer_unpin_cb *) drm_intel_bo_unpin; cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata; cl_buffer_get_subdata = (cl_buffer_get_subdata_cb *) drm_intel_bo_get_subdata; cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering; cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime; cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align; intel_set_gpgpu_callbacks(intel_get_device_id()); } Beignet-1.1.1-Source/src/intel/intel_defines.h000664 001750 001750 00000042637 12576733264 020373 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* Copyright (C) Intel Corp. 2006. All Rights Reserved. Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to develop this 3D driver. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **********************************************************************/ /* * Authors: * Keith Whitwell */ #ifndef __GENX_DEFINES_H__ #define __GENX_DEFINES_H__ #define CMD(PIPELINE,OP,SUB_OP) ((3 << 29) | \ ((PIPELINE) << 27) | \ ((OP) << 24) | \ ((SUB_OP) << 16)) #define CMD_URB_FENCE CMD(0, 0, 0) #define CMD_CS_URB_STATE CMD(0, 0, 1) #define CMD_CONSTANT_BUFFER CMD(0, 0, 2) #define CMD_STATE_PREFETCH CMD(0, 0, 3) #define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3) #define CMD_MEDIA_STATE_FLUSH CMD(2, 0, 4) #define CMD_GPGPU_WALKER CMD(2, 1, 5) #define CMD_PIPE_CONTROL CMD(3, 2, 0) #define CMD_LOAD_REGISTER_IMM (0x22 << 23) #define CMD_STATE_BASE_ADDRESS CMD(0, 1, 1) #define CMD_STATE_SIP CMD(0, 1, 2) #define CMD_PIPELINE_SELECT CMD(1, 1, 4) #define CMD_SAMPLER_PALETTE_LOAD CMD(3, 1, 2) #define CMD_MEDIA_STATE_POINTERS CMD(2, 0, 0) #define CMD_MEDIA CMD(2, 1, 0) #define CMD_MEDIA_EX CMD(2, 1, 1) #define CMD_PIPELINED_POINTERS CMD(3, 0, 0) #define CMD_BINDING_TABLE_POINTERS CMD(3, 0, 1) #define CMD_VERTEX_BUFFERS CMD(3, 0, 8) #define CMD_VERTEX_ELEMENTS CMD(3, 0, 9) #define CMD_DRAWING_RECTANGLE CMD(3, 1, 0) #define CMD_CONSTANT_COLOR CMD(3, 1, 1) #define CMD_3DPRIMITIVE CMD(3, 3, 0) #define BASE_ADDRESS_MODIFY (1 << 0) #define PIPELINE_SELECT_3D 0 #define PIPELINE_SELECT_MEDIA 1 #define PIPELINE_SELECT_GPGPU 2 #define PIPELINE_SELECT_MASK (3 << 8) #define UF0_CS_REALLOC (1 << 13) #define UF0_VFE_REALLOC (1 << 12) #define UF0_SF_REALLOC (1 << 11) #define UF0_CLIP_REALLOC (1 << 10) #define UF0_GS_REALLOC (1 << 9) #define UF0_VS_REALLOC (1 << 8) #define UF1_CLIP_FENCE_SHIFT 20 #define UF1_GS_FENCE_SHIFT 10 #define UF1_VS_FENCE_SHIFT 0 #define UF2_CS_FENCE_SHIFT 20 #define UF2_VFE_FENCE_SHIFT 10 #define UF2_SF_FENCE_SHIFT 0 #define FLOATING_POINT_IEEE_754 0 #define FLOATING_POINT_NON_IEEE_754 1 #define I965_SURFACE_1D 0 #define I965_SURFACE_2D 1 #define I965_SURFACE_3D 2 #define I965_SURFACE_CUBE 3 #define I965_SURFACE_BUFFER 4 #define I965_SURFACE_NULL 7 #define I965_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 #define I965_SURFACEFORMAT_R32G32B32A32_SINT 0x001 #define I965_SURFACEFORMAT_R32G32B32A32_UINT 0x002 #define I965_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 #define I965_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 #define I965_SURFACEFORMAT_R64G64_FLOAT 0x005 #define I965_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 #define I965_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 #define I965_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 #define I965_SURFACEFORMAT_R32G32B32_FLOAT 0x040 #define I965_SURFACEFORMAT_R32G32B32_SINT 0x041 #define I965_SURFACEFORMAT_R32G32B32_UINT 0x042 #define I965_SURFACEFORMAT_R32G32B32_UNORM 0x043 #define I965_SURFACEFORMAT_R32G32B32_SNORM 0x044 #define I965_SURFACEFORMAT_R32G32B32_SSCALED 0x045 #define I965_SURFACEFORMAT_R32G32B32_USCALED 0x046 #define I965_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 #define I965_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 #define I965_SURFACEFORMAT_R16G16B16A16_SINT 0x082 #define I965_SURFACEFORMAT_R16G16B16A16_UINT 0x083 #define I965_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 #define I965_SURFACEFORMAT_R32G32_FLOAT 0x085 #define I965_SURFACEFORMAT_R32G32_SINT 0x086 #define I965_SURFACEFORMAT_R32G32_UINT 0x087 #define I965_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 #define I965_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 #define I965_SURFACEFORMAT_L32A32_FLOAT 0x08A #define I965_SURFACEFORMAT_R32G32_UNORM 0x08B #define I965_SURFACEFORMAT_R32G32_SNORM 0x08C #define I965_SURFACEFORMAT_R64_FLOAT 0x08D #define I965_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E #define I965_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F #define I965_SURFACEFORMAT_A32X32_FLOAT 0x090 #define I965_SURFACEFORMAT_L32X32_FLOAT 0x091 #define I965_SURFACEFORMAT_I32X32_FLOAT 0x092 #define I965_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 #define I965_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 #define I965_SURFACEFORMAT_R32G32_SSCALED 0x095 #define I965_SURFACEFORMAT_R32G32_USCALED 0x096 #define I965_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 #define I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 #define I965_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 #define I965_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 #define I965_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 #define I965_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 #define I965_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 #define I965_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 #define I965_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 #define I965_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA #define I965_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB #define I965_SURFACEFORMAT_R16G16_UNORM 0x0CC #define I965_SURFACEFORMAT_R16G16_SNORM 0x0CD #define I965_SURFACEFORMAT_R16G16_SINT 0x0CE #define I965_SURFACEFORMAT_R16G16_UINT 0x0CF #define I965_SURFACEFORMAT_R16G16_FLOAT 0x0D0 #define I965_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 #define I965_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 #define I965_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 #define I965_SURFACEFORMAT_R32_SINT 0x0D6 #define I965_SURFACEFORMAT_R32_UINT 0x0D7 #define I965_SURFACEFORMAT_R32_FLOAT 0x0D8 #define I965_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 #define I965_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA #define I965_SURFACEFORMAT_L16A16_UNORM 0x0DF #define I965_SURFACEFORMAT_I24X8_UNORM 0x0E0 #define I965_SURFACEFORMAT_L24X8_UNORM 0x0E1 #define I965_SURFACEFORMAT_A24X8_UNORM 0x0E2 #define I965_SURFACEFORMAT_I32_FLOAT 0x0E3 #define I965_SURFACEFORMAT_L32_FLOAT 0x0E4 #define I965_SURFACEFORMAT_A32_FLOAT 0x0E5 #define I965_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 #define I965_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA #define I965_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB #define I965_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC #define I965_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED #define I965_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE #define I965_SURFACEFORMAT_L16A16_FLOAT 0x0F0 #define I965_SURFACEFORMAT_R32_UNORM 0x0F1 #define I965_SURFACEFORMAT_R32_SNORM 0x0F2 #define I965_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 #define I965_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 #define I965_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 #define I965_SURFACEFORMAT_R16G16_SSCALED 0x0F6 #define I965_SURFACEFORMAT_R16G16_USCALED 0x0F7 #define I965_SURFACEFORMAT_R32_SSCALED 0x0F8 #define I965_SURFACEFORMAT_R32_USCALED 0x0F9 #define I965_SURFACEFORMAT_B5G6R5_UNORM 0x100 #define I965_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 #define I965_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 #define I965_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 #define I965_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 #define I965_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 #define I965_SURFACEFORMAT_R8G8_UNORM 0x106 #define I965_SURFACEFORMAT_R8G8_SNORM 0x107 #define I965_SURFACEFORMAT_R8G8_SINT 0x108 #define I965_SURFACEFORMAT_R8G8_UINT 0x109 #define I965_SURFACEFORMAT_R16_UNORM 0x10A #define I965_SURFACEFORMAT_R16_SNORM 0x10B #define I965_SURFACEFORMAT_R16_SINT 0x10C #define I965_SURFACEFORMAT_R16_UINT 0x10D #define I965_SURFACEFORMAT_R16_FLOAT 0x10E #define I965_SURFACEFORMAT_I16_UNORM 0x111 #define I965_SURFACEFORMAT_L16_UNORM 0x112 #define I965_SURFACEFORMAT_A16_UNORM 0x113 #define I965_SURFACEFORMAT_L8A8_UNORM 0x114 #define I965_SURFACEFORMAT_I16_FLOAT 0x115 #define I965_SURFACEFORMAT_L16_FLOAT 0x116 #define I965_SURFACEFORMAT_A16_FLOAT 0x117 #define I965_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 #define I965_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A #define I965_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B #define I965_SURFACEFORMAT_R8G8_SSCALED 0x11C #define I965_SURFACEFORMAT_R8G8_USCALED 0x11D #define I965_SURFACEFORMAT_R16_SSCALED 0x11E #define I965_SURFACEFORMAT_R16_USCALED 0x11F #define I965_SURFACEFORMAT_R8_UNORM 0x140 #define I965_SURFACEFORMAT_R8_SNORM 0x141 #define I965_SURFACEFORMAT_R8_SINT 0x142 #define I965_SURFACEFORMAT_R8_UINT 0x143 #define I965_SURFACEFORMAT_A8_UNORM 0x144 #define I965_SURFACEFORMAT_I8_UNORM 0x145 #define I965_SURFACEFORMAT_L8_UNORM 0x146 #define I965_SURFACEFORMAT_P4A4_UNORM 0x147 #define I965_SURFACEFORMAT_A4P4_UNORM 0x148 #define I965_SURFACEFORMAT_R8_SSCALED 0x149 #define I965_SURFACEFORMAT_R8_USCALED 0x14A #define I965_SURFACEFORMAT_R1_UINT 0x181 #define I965_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define I965_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 #define I965_SURFACEFORMAT_BC1_UNORM 0x186 #define I965_SURFACEFORMAT_BC2_UNORM 0x187 #define I965_SURFACEFORMAT_BC3_UNORM 0x188 #define I965_SURFACEFORMAT_BC4_UNORM 0x189 #define I965_SURFACEFORMAT_BC5_UNORM 0x18A #define I965_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B #define I965_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C #define I965_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D #define I965_SURFACEFORMAT_MONO8 0x18E #define I965_SURFACEFORMAT_YCRCB_SWAPUV 0x18F #define I965_SURFACEFORMAT_YCRCB_SWAPY 0x190 #define I965_SURFACEFORMAT_DXT1_RGB 0x191 #define I965_SURFACEFORMAT_FXT1 0x192 #define I965_SURFACEFORMAT_R8G8B8_UNORM 0x193 #define I965_SURFACEFORMAT_R8G8B8_SNORM 0x194 #define I965_SURFACEFORMAT_R8G8B8_SSCALED 0x195 #define I965_SURFACEFORMAT_R8G8B8_USCALED 0x196 #define I965_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 #define I965_SURFACEFORMAT_R64G64B64_FLOAT 0x198 #define I965_SURFACEFORMAT_BC4_SNORM 0x199 #define I965_SURFACEFORMAT_BC5_SNORM 0x19A #define I965_SURFACEFORMAT_R16G16B16_UNORM 0x19C #define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D #define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E #define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F #define I965_SURFACEFORMAT_RAW 0x1FF #define I965_MAPFILTER_NEAREST 0x0 #define I965_MAPFILTER_LINEAR 0x1 #define I965_MAPFILTER_ANISOTROPIC 0x2 #define I965_MIPFILTER_NONE 0 #define I965_MIPFILTER_NEAREST 1 #define I965_MIPFILTER_LINEAR 3 #define I965_TEXCOORDMODE_WRAP 0 #define I965_TEXCOORDMODE_MIRROR 1 #define I965_TEXCOORDMODE_CLAMP 2 #define I965_TEXCOORDMODE_CUBE 3 #define I965_TEXCOORDMODE_CLAMP_BORDER 4 #define I965_TEXCOORDMODE_MIRROR_ONCE 5 #define I965_SURFACERETURNFORMAT_FLOAT32 0 #define I965_SURFACERETURNFORMAT_S1 1 #define I965_TILEWALK_XMAJOR 0 #define I965_TILEWALK_YMAJOR 1 #define GEN8_TILEMODE_LINEAR 0 #define GEN8_TILEMODE_WMAJOR 1 #define GEN8_TILEMODE_XMAJOR 2 #define GEN8_TILEMODE_YMAJOR 3 #define I965_SURCHAN_SELECT_ZERO 0 #define I965_SURCHAN_SELECT_ONE 1 #define I965_SURCHAN_SELECT_RED 4 #define I965_SURCHAN_SELECT_GREEN 5 #define I965_SURCHAN_SELECT_BLUE 6 #define I965_SURCHAN_SELECT_ALPHA 7 #define URB_SIZE(intel) (IS_IGDNG(intel->device_id) ? 1024 : \ IS_G4X(intel->device_id) ? 384 : 256) // HSW #define HSW_SCRATCH1_OFFSET (0xB038) #define HSW_ROW_CHICKEN3_HDC_OFFSET (0xE49C) // L3 cache stuff #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET (0XB010) #define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET (0xB020) #define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET (0xB024) #define GEN8_L3_CNTL_REG_ADDRESS_OFFSET (0x7034) // To issue pipe controls (reset L3 / SLM or stall) #define GEN7_PIPE_CONTROL_MEDIA 0x2 #define GEN7_PIPE_CONTROL_3D 0x3 #define GEN7_PIPE_CONTROL_INSTRUCTION_GFX 0x3 #define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2 #define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0 #define GEN7_PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) #define GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) #define GEN_MAPFILTER_NEAREST 0x0 #define GEN_MAPFILTER_LINEAR 0x1 #define GEN_MAPFILTER_ANISOTROPIC 0x2 #define GEN_MIPFILTER_NONE 0 #define GEN_MIPFILTER_NEAREST 1 #define GEN_MIPFILTER_LINEAR 3 #define GEN_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 #define GEN_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 #define GEN_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 #define GEN_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 #define GEN_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 #define GEN_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 #define GEN_TEXCOORDMODE_WRAP 0 #define GEN_TEXCOORDMODE_MIRROR 1 #define GEN_TEXCOORDMODE_CLAMP 2 #define GEN_TEXCOORDMODE_CUBE 3 #define GEN_TEXCOORDMODE_CLAMP_BORDER 4 #define GEN_TEXCOORDMODE_MIRROR_ONCE 5 #endif /* __GENX_DEFINES_H__ */ Beignet-1.1.1-Source/src/intel/intel_driver.h000664 001750 001750 00000012634 12576733264 020243 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2009 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef _INTEL_DRIVER_H_ #define _INTEL_DRIVER_H_ #include "cl_device_data.h" #include #include #include #include #include #include #include #include #define CMD_MI (0x0 << 29) #define CMD_2D (0x2 << 29) #define MI_NOOP (CMD_MI | 0) #define MI_BATCH_BUFFER_END (CMD_MI | (0xA << 23)) #define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 0x04) #define XY_COLOR_BLT_WRITE_ALPHA (1 << 21) #define XY_COLOR_BLT_WRITE_RGB (1 << 20) #define XY_COLOR_BLT_DST_TILED (1 << 11) /* BR13 */ #define BR13_565 (0x1 << 24) #define BR13_8888 (0x3 << 24) struct dri_state; struct intel_gpgpu_node; typedef struct _XDisplay Display; typedef struct intel_driver { dri_bufmgr *bufmgr; drm_intel_context *ctx; int fd; int device_id; int gen_ver; sigset_t sa_mask; pthread_mutex_t ctxmutex; int locked; int need_close; Display *x11_display; struct dri_state *dri_ctx; struct intel_gpgpu_node *gpgpu_list; int atomic_test_result; } intel_driver_t; #define SET_BLOCKED_SIGSET(DRIVER) do { \ sigset_t bl_mask; \ sigfillset(&bl_mask); \ sigdelset(&bl_mask, SIGFPE); \ sigdelset(&bl_mask, SIGILL); \ sigdelset(&bl_mask, SIGSEGV); \ sigdelset(&bl_mask, SIGBUS); \ sigdelset(&bl_mask, SIGKILL); \ pthread_sigmask(SIG_SETMASK, &bl_mask, &(DRIVER)->sa_mask); \ } while (0) #define RESTORE_BLOCKED_SIGSET(DRIVER) do { \ pthread_sigmask(SIG_SETMASK, &(DRIVER)->sa_mask, NULL); \ } while (0) #define PPTHREAD_MUTEX_LOCK(DRIVER) do { \ SET_BLOCKED_SIGSET(DRIVER); \ pthread_mutex_lock(&(DRIVER)->ctxmutex); \ } while (0) #define PPTHREAD_MUTEX_UNLOCK(DRIVER) do { \ pthread_mutex_unlock(&(DRIVER)->ctxmutex); \ RESTORE_BLOCKED_SIGSET(DRIVER); \ } while (0) /* device control */ extern void intel_driver_lock_hardware(intel_driver_t*); extern void intel_driver_unlock_hardware(intel_driver_t*); /* methods working in shared mode */ extern dri_bo* intel_driver_share_buffer(intel_driver_t*, const char *sname, uint32_t name); extern uint32_t intel_driver_shared_name(intel_driver_t*, dri_bo*); /* init driver shared with X using dri state, acquired from X Display */ extern int intel_driver_init_shared(intel_driver_t*, struct dri_state*); /* init driver in master mode (when X is not using the card) * usually dev_name = "/dev/dri/card0" */ extern int intel_driver_init_master(intel_driver_t*, const char* dev_name); /* init driver for render node */ extern int intel_driver_init_render(intel_driver_t*, const char* dev_name); /* terminate driver and all underlying structures */ extern int intel_driver_terminate(intel_driver_t*); /* simple check if driver was initialized (checking fd should suffice) */ extern int intel_driver_is_active(intel_driver_t*); /* init the call backs used by the ocl driver */ extern void intel_setup_callbacks(void); #endif /* _INTEL_DRIVER_H_ */ Beignet-1.1.1-Source/src/intel/intel_dri_resource_sharing_int.h000664 001750 001750 00000007515 12576733264 024024 0ustar00yryr000000 000000 /***************************************************************** * The following functions are copied from i965 driver, commit * id 292368570a13501dfa95b1b0dd70966caf6ffc6b. Need to keep consistant * with the dri driver installed on current system. *****************************************************************/ static bool _intel_region_flink(struct intel_region *region, uint32_t *name) { if (region->name == 0) { if (drm_intel_bo_flink(region->bo, ®ion->name)) return false; } *name = region->name; return true; } #define _DBG(...) static void _intel_region_release(struct intel_region **region_handle) { struct intel_region *region = *region_handle; if (region == NULL) { _DBG("%s NULL\n", __FUNCTION__); return; } _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; if (region->refcount == 0) { drm_intel_bo_unreference(region->bo); free(region); } *region_handle = NULL; } static void _intel_region_reference(struct intel_region **dst, struct intel_region *src) { _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); if (src != *dst) { if (*dst) _intel_region_release(dst); if (src) src->refcount++; *dst = src; } } /** * This function computes masks that may be used to select the bits of the X * and Y coordinates that indicate the offset within a tile. If the region is * untiled, the masks are set to 0. */ static void _intel_region_get_tile_masks(struct intel_region *region, uint32_t *mask_x, uint32_t *mask_y, bool map_stencil_as_y_tiled) { int cpp = region->cpp; uint32_t tiling = region->tiling; if (map_stencil_as_y_tiled) tiling = I915_TILING_Y; switch (tiling) { default: assert(false); case I915_TILING_NONE: *mask_x = *mask_y = 0; break; case I915_TILING_X: *mask_x = 512 / cpp - 1; *mask_y = 7; break; case I915_TILING_Y: *mask_x = 128 / cpp - 1; *mask_y = 31; break; } } /** * Compute the offset (in bytes) from the start of the region to the given x * and y coordinate. For tiled regions, caller must ensure that x and y are * multiples of the tile size. */ static uint32_t _intel_region_get_aligned_offset(struct intel_region *region, uint32_t x, uint32_t y, bool map_stencil_as_y_tiled) { int cpp = region->cpp; uint32_t pitch = region->pitch; uint32_t tiling = region->tiling; if (map_stencil_as_y_tiled) { tiling = I915_TILING_Y; /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile * gets transformed into a 32-high Y-tile. Accordingly, the pitch of * the resulting region is twice the pitch of the original region, since * each row in the Y-tiled view corresponds to two rows in the actual * W-tiled surface. So we need to correct the pitch before computing * the offsets. */ pitch *= 2; } switch (tiling) { default: assert(false); case I915_TILING_NONE: return y * pitch + x * cpp; case I915_TILING_X: assert((x % (512 / cpp)) == 0); assert((y % 8) == 0); return y * pitch + x / (512 / cpp) * 4096; case I915_TILING_Y: assert((x % (128 / cpp)) == 0); assert((y % 32) == 0); return y * pitch + x / (128 / cpp) * 4096; } } static void _intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, GLuint level, GLuint slice, GLuint *x, GLuint *y) { assert(slice < mt->level[level].depth); *x = mt->level[level].slice[slice].x_offset; *y = mt->level[level].slice[slice].y_offset; } Beignet-1.1.1-Source/src/intel/intel_structs.h000664 001750 001750 00000044610 12576733264 020456 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2009 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef __INTEL_STRUCTS_H__ #define __INTEL_STRUCTS_H__ #include typedef struct gen6_interface_descriptor { struct { uint32_t pad6:6; uint32_t kernel_start_pointer:26; } desc0; struct { uint32_t pad:7; uint32_t software_exception:1; uint32_t pad2:3; uint32_t maskstack_exception:1; uint32_t pad3:1; uint32_t illegal_opcode_exception:1; uint32_t pad4:2; uint32_t floating_point_mode:1; uint32_t thread_priority:1; uint32_t single_program_flow:1; uint32_t pad5:1; uint32_t pad6:6; uint32_t pad7:6; } desc1; struct { uint32_t pad:2; uint32_t sampler_count:3; uint32_t sampler_state_pointer:27; } desc2; struct { uint32_t binding_table_entry_count:5; /* prefetch entries only */ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */ } desc3; struct { uint32_t curbe_read_offset:16; /* in GRFs */ uint32_t curbe_read_len:16; /* in GRFs */ } desc4; struct { uint32_t group_threads_num:8; /* 0..64, 0 - no barrier use */ uint32_t barrier_return_byte:8; uint32_t slm_sz:5; /* 0..16 - 0K..64K */ uint32_t barrier_enable:1; uint32_t rounding_mode:2; uint32_t barrier_return_grf_offset:8; } desc5; uint32_t desc6; /* unused */ uint32_t desc7; /* unused */ } gen6_interface_descriptor_t; typedef struct gen8_interface_descriptor { struct { uint32_t pad6:6; uint32_t kernel_start_pointer:26; } desc0; struct { uint32_t kernel_start_pointer_high:16; uint32_t pad6:16; } desc1; struct { uint32_t pad:7; uint32_t software_exception:1; uint32_t pad2:3; uint32_t maskstack_exception:1; uint32_t pad3:1; uint32_t illegal_opcode_exception:1; uint32_t pad4:2; uint32_t floating_point_mode:1; uint32_t thread_priority:1; uint32_t single_program_flow:1; uint32_t denorm_mode:1; uint32_t thread_preemption_disable:1; uint32_t pad5:11; } desc2; struct { uint32_t pad:2; uint32_t sampler_count:3; uint32_t sampler_state_pointer:27; } desc3; struct { uint32_t binding_table_entry_count:5; /* prefetch entries only */ uint32_t binding_table_pointer:27; /* 11 bit only on IVB+ */ } desc4; struct { uint32_t curbe_read_offset:16; /* in GRFs */ uint32_t curbe_read_len:16; /* in GRFs */ } desc5; struct { uint32_t group_threads_num:10; /* 0..64, 0 - no barrier use */ uint32_t pad:5; uint32_t global_barrier_enable:1; uint32_t slm_sz:5; /* 0..16 - 0K..64K */ uint32_t barrier_enable:1; uint32_t rounding_mode:2; uint32_t barrier_return_grf_offset:8; } desc6; uint32_t desc7; /* unused */ } gen8_interface_descriptor_t; typedef struct gen7_surface_state { struct { uint32_t cube_pos_z:1; uint32_t cube_neg_z:1; uint32_t cube_pos_y:1; uint32_t cube_neg_y:1; uint32_t cube_pos_x:1; uint32_t cube_neg_x:1; uint32_t media_boundary_pixel_mode:2; uint32_t render_cache_rw_mode:1; uint32_t pad1:1; uint32_t surface_array_spacing:1; uint32_t vertical_line_stride_offset:1; uint32_t vertical_line_stride:1; uint32_t tile_walk:1; uint32_t tiled_surface:1; uint32_t horizontal_alignment:1; uint32_t vertical_alignment:2; uint32_t surface_format:9; uint32_t pad0:1; uint32_t surface_array:1; uint32_t surface_type:3; } ss0; struct { uint32_t base_addr; } ss1; struct { uint32_t width:14; uint32_t pad1:2; uint32_t height:14; uint32_t pad0:2; } ss2; struct { uint32_t pitch:18; uint32_t pad0:3; uint32_t depth:11; } ss3; union { struct { uint32_t mulsample_pal_idx:3; uint32_t numer_mulsample:3; uint32_t mss_fmt:1; uint32_t rt_view_extent:11; uint32_t min_array_element:11; uint32_t rt_rotate:2; uint32_t pad0:1; } not_str_buf; } ss4; struct { uint32_t mip_count:4; uint32_t surface_min_load:4; uint32_t pad2:6; uint32_t coherence_type:1; uint32_t stateless_force_write_thru:1; uint32_t cache_control:4; uint32_t y_offset:4; uint32_t pad0:1; uint32_t x_offset:7; } ss5; uint32_t ss6; /* unused */ struct { uint32_t min_lod:12; uint32_t pad0:4; uint32_t shader_a:3; uint32_t shader_b:3; uint32_t shader_g:3; uint32_t shader_r:3; uint32_t pad1:4; } ss7; } gen7_surface_state_t; typedef struct gen8_surface_state { struct { uint32_t cube_pos_z:1; uint32_t cube_neg_z:1; uint32_t cube_pos_y:1; uint32_t cube_neg_y:1; uint32_t cube_pos_x:1; uint32_t cube_neg_x:1; uint32_t media_boundary_pixel_mode:2; uint32_t render_cache_rw_mode:1; uint32_t sampler_L2_bypass_mode:1; uint32_t vertical_line_stride_offset:1; uint32_t vertical_line_stride:1; uint32_t tile_mode:2; uint32_t horizontal_alignment:2; uint32_t vertical_alignment:2; uint32_t surface_format:9; uint32_t pad0:1; uint32_t surface_array:1; uint32_t surface_type:3; } ss0; struct { uint32_t surface_qpitch:15; uint32_t pad0:3; uint32_t pad1:1; uint32_t base_mip_level:5; uint32_t mem_obj_ctrl_state:7; uint32_t pad2:1; } ss1; struct { uint32_t width:14; uint32_t pad1:2; uint32_t height:14; uint32_t pad0:2; } ss2; struct { uint32_t surface_pitch:18; uint32_t pad1:2; uint32_t pad0:1; uint32_t depth:11; } ss3; struct { union { struct { uint32_t multisample_pos_palette_idx:3; uint32_t multisample_num:3; uint32_t multisample_format:1; uint32_t render_target_view_ext:11; uint32_t min_array_elt:11; uint32_t render_target_and_sample_rotation:2; uint32_t pad1:1; }; uint32_t pad0; }; } ss4; struct { uint32_t mip_count:4; uint32_t surface_min_lod:4; uint32_t pad5:4; uint32_t pad4:2; uint32_t conherency_type:1; uint32_t pad3:3; uint32_t pad2:2; uint32_t cube_ewa:1; uint32_t y_offset:3; uint32_t pad0:1; uint32_t x_offset:7; } ss5; struct { union { union { struct { uint32_t aux_surface_mode:3; uint32_t aux_surface_pitch:9; uint32_t pad3:4; }; struct { uint32_t uv_plane_y_offset:14; uint32_t pad2:2; }; }; struct { uint32_t uv_plane_x_offset:14; uint32_t pad1:1; uint32_t seperate_uv_plane_enable:1; }; struct { uint32_t aux_sruface_qpitch:15; uint32_t pad0:1; }; }; } ss6; struct { uint32_t resource_min_lod:12; uint32_t pad0:4; uint32_t shader_channel_select_alpha:3; uint32_t shader_channel_select_blue:3; uint32_t shader_channel_select_green:3; uint32_t shader_channel_select_red:3; uint32_t alpha_clear_color:1; uint32_t blue_clear_color:1; uint32_t green_clear_color:1; uint32_t red_clear_color:1; } ss7; struct { uint32_t surface_base_addr_lo; } ss8; struct { uint32_t surface_base_addr_hi; } ss9; struct { uint32_t pad0:12; uint32_t aux_base_addr_lo:20; } ss10; struct { uint32_t aux_base_addr_hi:32; } ss11; struct { uint32_t pad0; } ss12; /* 13~15 have meaning only when aux surface mode == AUX_HIZ */ struct { uint32_t pad0; } ss13; struct { uint32_t pad0; } ss14; struct { uint32_t pad0; } ss15; } gen8_surface_state_t; typedef union gen_surface_state { gen7_surface_state_t gen7_surface_state; gen8_surface_state_t gen8_surface_state; } gen_surface_state_t; static const size_t surface_state_sz = sizeof(gen_surface_state_t); typedef struct gen6_vfe_state_inline { struct { uint32_t per_thread_scratch_space:4; uint32_t pad3:3; uint32_t extend_vfe_state_present:1; uint32_t pad2:2; uint32_t scratch_base:22; } vfe0; struct { uint32_t debug_counter_control:2; uint32_t gpgpu_mode:1; /* 0 for SNB!!! */ uint32_t gateway_mmio_access:2; uint32_t fast_preempt:1; uint32_t bypass_gateway_ctl:1; /* 0 - legacy, 1 - no open/close */ uint32_t reset_gateway_timer:1; uint32_t urb_entries:8; uint32_t max_threads:16; } vfe1; struct { uint32_t pad8:8; uint32_t debug_object_id:24; } vfe2; struct { uint32_t curbe_size:16; /* in GRFs */ uint32_t urb_size:16; /* in GRFs */ } vfe3; struct { uint32_t scoreboard_mask:32; /* 1 - enable the corresponding dependency */ } vfe4; struct { uint32_t scoreboard0_dx:4; uint32_t scoreboard0_dy:4; uint32_t scoreboard1_dx:4; uint32_t scoreboard1_dy:4; uint32_t scoreboard2_dx:4; uint32_t scoreboard2_dy:4; uint32_t scoreboard3_dx:4; uint32_t scoreboard3_dy:4; } vfe5; struct { uint32_t scoreboard4_dx:4; uint32_t scoreboard4_dy:4; uint32_t scoreboard5_dx:4; uint32_t scoreboard5_dy:4; uint32_t scoreboard6_dx:4; uint32_t scoreboard6_dy:4; uint32_t scoreboard7_dx:4; uint32_t scoreboard7_dy:4; } vfe6; } gen6_vfe_state_inline_t; typedef struct gen6_pipe_control { struct { uint32_t length : BITFIELD_RANGE(0, 7); uint32_t reserved : BITFIELD_RANGE(8, 15); uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23); uint32_t instruction_opcode : BITFIELD_RANGE(24, 26); uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28); uint32_t instruction_type : BITFIELD_RANGE(29, 31); } dw0; struct { uint32_t depth_cache_flush_enable : BITFIELD_BIT(0); uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1); uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2); uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3); uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4); uint32_t dc_flush_enable : BITFIELD_BIT(5); uint32_t protected_memory_app_id : BITFIELD_BIT(6); uint32_t pipe_control_flush_enable : BITFIELD_BIT(7); uint32_t notify_enable : BITFIELD_BIT(8); uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9); uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10); uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11); uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12); uint32_t depth_stall_enable : BITFIELD_BIT(13); uint32_t post_sync_operation : BITFIELD_RANGE(14, 15); uint32_t generic_media_state_clear : BITFIELD_BIT(16); uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17); uint32_t tlb_invalidate : BITFIELD_BIT(18); uint32_t global_snapshot_count_reset : BITFIELD_BIT(19); uint32_t cs_stall : BITFIELD_BIT(20); uint32_t store_data_index : BITFIELD_BIT(21); uint32_t protected_memory_enable : BITFIELD_BIT(22); uint32_t reserved : BITFIELD_RANGE(23, 31); } dw1; struct { uint32_t reserved : BITFIELD_RANGE(0, 1); uint32_t destination_address_type : BITFIELD_BIT(2); uint32_t address : BITFIELD_RANGE(3, 31); } dw2; struct { uint32_t data; } dw3; struct { uint32_t data; } dw4; } gen6_pipe_control_t; typedef struct gen8_pipe_control { struct { uint32_t length : BITFIELD_RANGE(0, 7); uint32_t reserved : BITFIELD_RANGE(8, 15); uint32_t instruction_subopcode : BITFIELD_RANGE(16, 23); uint32_t instruction_opcode : BITFIELD_RANGE(24, 26); uint32_t instruction_pipeline : BITFIELD_RANGE(27, 28); uint32_t instruction_type : BITFIELD_RANGE(29, 31); } dw0; struct { uint32_t depth_cache_flush_enable : BITFIELD_BIT(0); uint32_t stall_at_pixel_scoreboard : BITFIELD_BIT(1); uint32_t state_cache_invalidation_enable : BITFIELD_BIT(2); uint32_t constant_cache_invalidation_enable : BITFIELD_BIT(3); uint32_t vf_cache_invalidation_enable : BITFIELD_BIT(4); uint32_t dc_flush_enable : BITFIELD_BIT(5); uint32_t protected_memory_app_id : BITFIELD_BIT(6); uint32_t pipe_control_flush_enable : BITFIELD_BIT(7); uint32_t notify_enable : BITFIELD_BIT(8); uint32_t indirect_state_pointers_disable : BITFIELD_BIT(9); uint32_t texture_cache_invalidation_enable : BITFIELD_BIT(10); uint32_t instruction_cache_invalidate_enable : BITFIELD_BIT(11); uint32_t render_target_cache_flush_enable : BITFIELD_BIT(12); uint32_t depth_stall_enable : BITFIELD_BIT(13); uint32_t post_sync_operation : BITFIELD_RANGE(14, 15); uint32_t generic_media_state_clear : BITFIELD_BIT(16); uint32_t synchronize_gfdt_surface : BITFIELD_BIT(17); uint32_t tlb_invalidate : BITFIELD_BIT(18); uint32_t global_snapshot_count_reset : BITFIELD_BIT(19); uint32_t cs_stall : BITFIELD_BIT(20); uint32_t store_data_index : BITFIELD_BIT(21); uint32_t protected_memory_enable : BITFIELD_BIT(22); uint32_t reserved : BITFIELD_RANGE(23, 31); } dw1; struct { uint32_t reserved : BITFIELD_RANGE(0, 1); uint32_t destination_address_type : BITFIELD_BIT(2); uint32_t address : BITFIELD_RANGE(3, 31); } dw2; struct { uint32_t data; } dw3; struct { uint32_t data; } dw4; struct { uint32_t data; } dw5; } gen8_pipe_control_t; typedef struct gen6_sampler_state { struct { uint32_t shadow_function:3; uint32_t lod_bias:11; uint32_t min_filter:3; uint32_t mag_filter:3; uint32_t mip_filter:2; uint32_t base_level:5; uint32_t min_mag_neq:1; uint32_t lod_preclamp:1; uint32_t default_color_mode:1; uint32_t pad0:1; uint32_t disable:1; } ss0; struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; uint32_t cube_control_mode:1; uint32_t pad:2; uint32_t max_lod:10; uint32_t min_lod:10; } ss1; struct { uint32_t pad:5; uint32_t default_color_pointer:27; } ss2; struct { uint32_t non_normalized_coord:1; uint32_t pad:12; uint32_t address_round:6; uint32_t max_aniso:3; uint32_t chroma_key_mode:1; uint32_t chroma_key_index:2; uint32_t chroma_key_enable:1; uint32_t monochrome_filter_width:3; uint32_t monochrome_filter_height:3; } ss3; } gen6_sampler_state_t; typedef struct gen7_sampler_border_color { float r,g,b,a; } gen7_sampler_border_color_t; typedef struct gen7_sampler_state { struct { uint32_t aniso_algorithm:1; uint32_t lod_bias:13; uint32_t min_filter:3; uint32_t mag_filter:3; uint32_t mip_filter:2; uint32_t base_level:5; uint32_t pad1:1; uint32_t lod_preclamp:1; uint32_t default_color_mode:1; uint32_t pad0:1; uint32_t disable:1; } ss0; struct { uint32_t cube_control_mode:1; uint32_t shadow_function:3; uint32_t pad:4; uint32_t max_lod:12; uint32_t min_lod:12; } ss1; struct { uint32_t pad:5; uint32_t default_color_pointer:27; } ss2; struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; uint32_t pad:1; uint32_t non_normalized_coord:1; uint32_t trilinear_quality:2; uint32_t address_round:6; uint32_t max_aniso:3; uint32_t chroma_key_mode:1; uint32_t chroma_key_index:2; uint32_t chroma_key_enable:1; uint32_t pad0:6; } ss3; } gen7_sampler_state_t; STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t)); typedef struct gen8_sampler_state { struct { uint32_t aniso_algorithm:1; uint32_t lod_bias:13; uint32_t min_filter:3; uint32_t mag_filter:3; uint32_t mip_filter:2; uint32_t base_level:5; uint32_t lod_preclamp:2; uint32_t default_color_mode:1; uint32_t pad0:1; uint32_t disable:1; } ss0; struct { uint32_t cube_control_mode:1; uint32_t shadow_function:3; uint32_t chromakey_mode:1; uint32_t chromakey_index:2; uint32_t chromakey_enable:1; uint32_t max_lod:12; uint32_t min_lod:12; } ss1; struct { uint32_t lod_clamp_mag_mode:1; uint32_t flexible_filter_valign:1; uint32_t flexible_filter_halign:1; uint32_t flexible_filter_coeff_size:1; uint32_t flexible_filter_mode:1; uint32_t pad1:1; uint32_t indirect_state_ptr:18; uint32_t pad0:2; uint32_t sep_filter_height:2; uint32_t sep_filter_width:2; uint32_t sep_filter_coeff_table_size:2; } ss2; struct { uint32_t r_wrap_mode:3; uint32_t t_wrap_mode:3; uint32_t s_wrap_mode:3; uint32_t pad:1; uint32_t non_normalized_coord:1; uint32_t trilinear_quality:2; uint32_t address_round:6; uint32_t max_aniso:3; uint32_t pad0:2; uint32_t non_sep_filter_footprint_mask:8; } ss3; } gen8_sampler_state_t; STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen8_sampler_state_t)); #undef BITFIELD_BIT #undef BITFIELD_RANGE #endif /* __INTEL_STRUCTS_H__ */ Beignet-1.1.1-Source/src/intel/intel_batchbuffer.h000664 001750 001750 00000012242 12576733264 021216 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #ifndef _INTEL_BATCHBUFFER_H_ #define _INTEL_BATCHBUFFER_H_ #include "intel_defines.h" #include "cl_utils.h" #include #include #include #include #include #include #include #define BEGIN_BATCH(b, n) do { \ intel_batchbuffer_require_space(b, (n) * 4); \ } while (0) #define OUT_BATCH(b, d) do { \ intel_batchbuffer_emit_dword(b, d); \ } while (0) #define OUT_RELOC(b, bo, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ intel_batchbuffer_emit_reloc(b, bo, read_domains, write_domain, delta); \ } while (0) #define ADVANCE_BATCH(b) do { } while (0) struct intel_driver; typedef struct intel_batchbuffer { struct intel_driver *intel; drm_intel_bo *buffer; /** Last bo submitted to the hardware. used for clFinish. */ drm_intel_bo *last_bo; uint32_t size; uint8_t *map; uint8_t *ptr; /** HSW: can't set LRI in batch buffer, set I915_EXEC_ENABLE_SLM * flag when call exec. */ uint8_t enable_slm; int atomic; } intel_batchbuffer_t; extern intel_batchbuffer_t* intel_batchbuffer_new(struct intel_driver*); extern void intel_batchbuffer_delete(intel_batchbuffer_t*); extern void intel_batchbuffer_emit_reloc(intel_batchbuffer_t*, drm_intel_bo*, uint32_t read_domains, uint32_t write_domains, uint32_t delta); extern void intel_batchbuffer_init(intel_batchbuffer_t*, struct intel_driver*); extern void intel_batchbuffer_terminate(intel_batchbuffer_t*); extern int intel_batchbuffer_flush(intel_batchbuffer_t*); extern int intel_batchbuffer_reset(intel_batchbuffer_t*, size_t sz); static INLINE uint32_t intel_batchbuffer_space(const intel_batchbuffer_t *batch) { assert(batch->ptr); return batch->size - (batch->ptr - batch->map); } static INLINE void intel_batchbuffer_emit_dword(intel_batchbuffer_t *batch, uint32_t x) { assert(intel_batchbuffer_space(batch) >= 4); *(uint32_t*)batch->ptr = x; batch->ptr += 4; } static INLINE void intel_batchbuffer_require_space(intel_batchbuffer_t *batch, uint32_t size) { assert(size < batch->size - 8); if (intel_batchbuffer_space(batch) < size) intel_batchbuffer_space(batch); } static INLINE uint8_t* intel_batchbuffer_alloc_space(intel_batchbuffer_t *batch, uint32_t size) { assert(intel_batchbuffer_space(batch) >= size); uint8_t *space_ptr = batch->ptr; batch->ptr += size; return space_ptr; } static INLINE void intel_batchbuffer_start_atomic(intel_batchbuffer_t *batch, uint32_t size) { assert(!batch->atomic); intel_batchbuffer_require_space(batch, size); batch->atomic = 1; } static INLINE void intel_batchbuffer_end_atomic(intel_batchbuffer_t *batch) { assert(batch->atomic); batch->atomic = 0; } #endif /* _INTEL_BATCHBUFFER_H_ */ Beignet-1.1.1-Source/src/intel/intel_gpgpu.c000664 001750 001750 00000237061 12576733264 020070 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Alexei Soupikov */ #include #include #include #include #include #include #include #include #include #include #include #include "intel/intel_gpgpu.h" #include "intel/intel_defines.h" #include "intel/intel_structs.h" #include "program.h" // for BTI_RESERVED_NUM #include "cl_alloc.h" #include "cl_utils.h" #include "cl_sampler.h" #ifndef CL_VERSION_1_2 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 #endif #define GEN_CMD_MEDIA_OBJECT (0x71000000) #define MO_TS_BIT (1 << 24) #define MO_RETAIN_BIT (1 << 28) #define SAMPLER_STATE_SIZE (16) #define TIMESTAMP_ADDR 0x2358 /* Stores both binding tables and surface states */ typedef struct surface_heap { uint32_t binding_table[256]; char surface[256*sizeof(gen_surface_state_t)]; } surface_heap_t; typedef struct intel_event { drm_intel_bo *buffer; drm_intel_bo *ts_buf; int status; } intel_event_t; #define MAX_IF_DESC 32 typedef struct intel_gpgpu intel_gpgpu_t; typedef void (intel_gpgpu_set_L3_t)(intel_gpgpu_t *gpgpu, uint32_t use_slm); intel_gpgpu_set_L3_t *intel_gpgpu_set_L3 = NULL; typedef uint32_t (intel_gpgpu_get_scratch_index_t)(uint32_t size); intel_gpgpu_get_scratch_index_t *intel_gpgpu_get_scratch_index = NULL; typedef void (intel_gpgpu_post_action_t)(intel_gpgpu_t *gpgpu, int32_t flush_mode); intel_gpgpu_post_action_t *intel_gpgpu_post_action = NULL; typedef uint64_t (intel_gpgpu_read_ts_reg_t)(drm_intel_bufmgr *bufmgr); intel_gpgpu_read_ts_reg_t *intel_gpgpu_read_ts_reg = NULL; typedef void (intel_gpgpu_set_base_address_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_set_base_address_t *intel_gpgpu_set_base_address = NULL; typedef void (intel_gpgpu_setup_bti_t)(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index, uint32_t format); intel_gpgpu_setup_bti_t *intel_gpgpu_setup_bti = NULL; typedef void (intel_gpgpu_load_vfe_state_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_load_vfe_state_t *intel_gpgpu_load_vfe_state = NULL; typedef void (intel_gpgpu_build_idrt_t)(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel); intel_gpgpu_build_idrt_t *intel_gpgpu_build_idrt = NULL; typedef void (intel_gpgpu_load_curbe_buffer_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL; typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL; typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL; typedef void (intel_gpgpu_select_pipeline_t)(intel_gpgpu_t *gpgpu); intel_gpgpu_select_pipeline_t *intel_gpgpu_select_pipeline = NULL; static void intel_gpgpu_sync(void *buf) { if (buf) drm_intel_bo_wait_rendering((drm_intel_bo *)buf); } static void *intel_gpgpu_ref_batch_buf(intel_gpgpu_t *gpgpu) { if (gpgpu->batch->last_bo) drm_intel_bo_reference(gpgpu->batch->last_bo); return gpgpu->batch->last_bo; } static void intel_gpgpu_unref_batch_buf(void *buf) { if (buf) drm_intel_bo_unreference((drm_intel_bo *)buf); } static void intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu) { if (gpgpu == NULL) return; if(gpgpu->time_stamp_b.bo) drm_intel_bo_unreference(gpgpu->time_stamp_b.bo); if(gpgpu->printf_b.bo) drm_intel_bo_unreference(gpgpu->printf_b.bo); if(gpgpu->printf_b.ibo) drm_intel_bo_unreference(gpgpu->printf_b.ibo); if (gpgpu->aux_buf.bo) drm_intel_bo_unreference(gpgpu->aux_buf.bo); if (gpgpu->perf_b.bo) drm_intel_bo_unreference(gpgpu->perf_b.bo); if (gpgpu->stack_b.bo) drm_intel_bo_unreference(gpgpu->stack_b.bo); if (gpgpu->scratch_b.bo) drm_intel_bo_unreference(gpgpu->scratch_b.bo); if(gpgpu->constant_b.bo) drm_intel_bo_unreference(gpgpu->constant_b.bo); intel_batchbuffer_delete(gpgpu->batch); cl_free(gpgpu); } /* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */ void intel_gpgpu_delete_all(intel_driver_t *drv) { struct intel_gpgpu_node *p; if(drv->gpgpu_list == NULL) return; PPTHREAD_MUTEX_LOCK(drv); while(drv->gpgpu_list) { p = drv->gpgpu_list; drv->gpgpu_list = p->next; intel_gpgpu_delete_finished(p->gpgpu); cl_free(p); } PPTHREAD_MUTEX_UNLOCK(drv); } static void intel_gpgpu_delete(intel_gpgpu_t *gpgpu) { intel_driver_t *drv = gpgpu->drv; struct intel_gpgpu_node *p, *node; PPTHREAD_MUTEX_LOCK(drv); p = drv->gpgpu_list; if(p) { node = p->next; while(node) { if(node->gpgpu->batch && node->gpgpu->batch->buffer && !drm_intel_bo_busy(node->gpgpu->batch->buffer)) { p->next = node->next; intel_gpgpu_delete_finished(node->gpgpu); cl_free(node); node = p->next; } else { p = node; node = node->next; } } node = drv->gpgpu_list; if(node->gpgpu->batch && node->gpgpu->batch->buffer && !drm_intel_bo_busy(node->gpgpu->batch->buffer)) { drv->gpgpu_list = drv->gpgpu_list->next; intel_gpgpu_delete_finished(node->gpgpu); cl_free(node); node = p->next; } } if (gpgpu == NULL) return; if(gpgpu->batch && gpgpu->batch->buffer && !drm_intel_bo_busy(gpgpu->batch->buffer)) { TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node)); node->gpgpu = gpgpu; node->next = NULL; p = drv->gpgpu_list; if(p == NULL) drv->gpgpu_list= node; else { while(p->next) p = p->next; p->next = node; } } else intel_gpgpu_delete_finished(gpgpu); error: PPTHREAD_MUTEX_UNLOCK(drv); } static intel_gpgpu_t* intel_gpgpu_new(intel_driver_t *drv) { intel_gpgpu_t *state = NULL; TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t)); state->drv = drv; state->batch = intel_batchbuffer_new(state->drv); assert(state->batch); exit: return state; error: intel_gpgpu_delete(state); state = NULL; goto exit; } static void intel_gpgpu_select_pipeline_gen7(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 1); OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_select_pipeline_gen9(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 1); OUT_BATCH(gpgpu->batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MASK | PIPELINE_SELECT_GPGPU); ADVANCE_BATCH(gpgpu->batch); } static uint32_t intel_gpgpu_get_cache_ctrl_gen7() { return cc_llc_l3; } static uint32_t intel_gpgpu_get_cache_ctrl_gen75() { return llccc_ec | l3cc_ec; } static uint32_t intel_gpgpu_get_cache_ctrl_gen8() { return tcc_llc_ec_l3 | mtllc_wb; } static uint32_t intel_gpgpu_get_cache_ctrl_gen9() { //Pre-defined cache control registers 9: //L3CC: WB; LeCC: WB; TC: LLC/eLLC; return (0x9 << 1); } static void intel_gpgpu_set_base_address_gen7(intel_gpgpu_t *gpgpu) { const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */ BEGIN_BATCH(gpgpu->batch, 10); OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 8); /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY); /* General State Base Addr */ /* 0, State Mem Obj CC */ /* We use a state base address for the surface heap since IVB clamp the * binding table pointer at 11 bits. So, we cannot use pointers directly while * using the surface heap */ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 8) | (def_cc << 4) | (0 << 3)| BASE_ADDRESS_MODIFY)); OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Dynamic State Base Addr */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 8) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */ OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY); /* According to mesa i965 driver code, we must set the dynamic state access upper bound * to a valid bound value, otherwise, the border color pointer may be rejected and you * may get incorrect border color. This is a known hardware bug. */ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_set_base_address_gen8(intel_gpgpu_t *gpgpu) { const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */ BEGIN_BATCH(gpgpu->batch, 16); OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 14); /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16)); /* 0, State Mem Obj CC */ /* We use a state base address for the surface heap since IVB clamp the * binding table pointer at 11 bits. So, we cannot use pointers directly while * using the surface heap */ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_SAMPLER, I915_GEM_DOMAIN_SAMPLER, gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); OUT_BATCH(gpgpu->batch, 0); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */ OUT_BATCH(gpgpu->batch, 0); //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */ OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); /* According to mesa i965 driver code, we must set the dynamic state access upper bound * to a valid bound value, otherwise, the border color pointer may be rejected and you * may get incorrect border color. This is a known hardware bug. */ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_set_base_address_gen9(intel_gpgpu_t *gpgpu) { const uint32_t def_cc = cl_gpgpu_get_cache_ctrl(); /* default Cache Control value */ BEGIN_BATCH(gpgpu->batch, 19); OUT_BATCH(gpgpu->batch, CMD_STATE_BASE_ADDRESS | 17); /* 0, Gen State Mem Obj CC, Stateless Mem Obj CC, Stateless Access Write Back */ OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY); /* General State Base Addr */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0 | (def_cc << 16)); /* 0, State Mem Obj CC */ /* We use a state base address for the surface heap since IVB clamp the * binding table pointer at 11 bits. So, we cannot use pointers directly while * using the surface heap */ assert(gpgpu->aux_offset.surface_heap_offset % 4096 == 0); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_SAMPLER, I915_GEM_DOMAIN_SAMPLER, gpgpu->aux_offset.surface_heap_offset + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); OUT_BATCH(gpgpu->batch, 0); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); /* Dynamic State Base Addr */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Indirect Obj Base Addr */ OUT_BATCH(gpgpu->batch, 0); //OUT_BATCH(gpgpu->batch, 0 | (def_cc << 4) | BASE_ADDRESS_MODIFY); /* Instruction Base Addr */ OUT_RELOC(gpgpu->batch, (drm_intel_bo *)gpgpu->ker->bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0 + (0 | (def_cc << 4) | (0 << 1)| BASE_ADDRESS_MODIFY)); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); /* According to mesa i965 driver code, we must set the dynamic state access upper bound * to a valid bound value, otherwise, the border color pointer may be rejected and you * may get incorrect border color. This is a known hardware bug. */ OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY); /* Bindless surface state base address */ OUT_BATCH(gpgpu->batch, (def_cc << 4) | BASE_ADDRESS_MODIFY); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0xfffff000); ADVANCE_BATCH(gpgpu->batch); } uint32_t intel_gpgpu_get_scratch_index_gen7(uint32_t size) { return size / 1024 - 1; } uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t size) { //align in backend, if non pow2, must align when alloc scratch bo. assert((size & (size - 1)) == 0); size = size >> 11; uint32_t index = 0; while((size >>= 1) > 0) index++; //get leading one return index; } uint32_t intel_gpgpu_get_scratch_index_gen8(uint32_t size) { //align in backend, if non pow2, must align when alloc scratch bo. assert((size & (size - 1)) == 0); size = size >> 10; uint32_t index = 0; while((size >>= 1) > 0) index++; //get leading one return index; } static cl_int intel_gpgpu_get_max_curbe_size(uint32_t device_id) { if (IS_BAYTRAIL_T(device_id) || IS_IVB_GT1(device_id)) return 992; else return 2016; } static cl_int intel_gpgpu_get_curbe_size(intel_gpgpu_t *gpgpu) { int curbe_size = gpgpu->curb.size_cs_entry * gpgpu->curb.num_cs_entries; int max_curbe_size = intel_gpgpu_get_max_curbe_size(gpgpu->drv->device_id); if (curbe_size > max_curbe_size) { fprintf(stderr, "warning, curbe size exceed limitation.\n"); return max_curbe_size; } else return curbe_size; } static void intel_gpgpu_load_vfe_state_gen7(intel_gpgpu_t *gpgpu) { int32_t scratch_index; BEGIN_BATCH(gpgpu->batch, 8); OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (8-2)); if(gpgpu->per_thread_scratch > 0) { scratch_index = intel_gpgpu_get_scratch_index(gpgpu->per_thread_scratch); OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, scratch_index); } else { OUT_BATCH(gpgpu->batch, 0); } /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */ OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 8) | 0xc4); OUT_BATCH(gpgpu->batch, 0); /* curbe_size */ OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu)); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_vfe_state_gen8(intel_gpgpu_t *gpgpu) { int32_t scratch_index; BEGIN_BATCH(gpgpu->batch, 9); OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_POINTERS | (9-2)); if(gpgpu->per_thread_scratch > 0) { scratch_index = intel_gpgpu_get_scratch_index(gpgpu->per_thread_scratch); OUT_RELOC(gpgpu->batch, gpgpu->scratch_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, scratch_index); } else { OUT_BATCH(gpgpu->batch, 0); } OUT_BATCH(gpgpu->batch, 0); /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */ OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (2 << 8) | 0xc0); //urb entries can't be 0 OUT_BATCH(gpgpu->batch, 0); /* urb entries size | curbe_size */ OUT_BATCH(gpgpu->batch, 2<<16 | intel_gpgpu_get_curbe_size(gpgpu)); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_curbe_buffer_gen7(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 4); OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */ OUT_BATCH(gpgpu->batch, 0); /* mbz */ OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.curbe_offset); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_curbe_buffer_gen8(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 4); OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */ OUT_BATCH(gpgpu->batch, 0); /* mbz */ OUT_BATCH(gpgpu->batch, intel_gpgpu_get_curbe_size(gpgpu) * 32); OUT_BATCH(gpgpu->batch, gpgpu->aux_offset.curbe_offset); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_idrt_gen7(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 4); OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */ OUT_BATCH(gpgpu->batch, 0); /* mbz */ OUT_BATCH(gpgpu->batch, 1 << 5); OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.idrt_offset); ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_load_idrt_gen8(intel_gpgpu_t *gpgpu) { BEGIN_BATCH(gpgpu->batch, 4); OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */ OUT_BATCH(gpgpu->batch, 0); /* mbz */ OUT_BATCH(gpgpu->batch, 1 << 5); OUT_BATCH(gpgpu->batch, gpgpu->aux_offset.idrt_offset); ADVANCE_BATCH(gpgpu->batch); } static const uint32_t gpgpu_l3_config_reg1[] = { 0x00080040, 0x02040040, 0x00800040, 0x01000038, 0x02000030, 0x01000038, 0x00000038, 0x00000040, 0x0A140091, 0x09100091, 0x08900091, 0x08900091, 0x010000a1 }; static const uint32_t gpgpu_l3_config_reg2[] = { 0x00000000, 0x00000000, 0x00080410, 0x00080410, 0x00040410, 0x00040420, 0x00080420, 0x00080020, 0x00204080, 0x00244890, 0x00284490, 0x002444A0, 0x00040810 }; /* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. */ static void intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx) { BEGIN_BATCH(gpgpu->batch, 5); OUT_BATCH(gpgpu->batch, CMD_PIPE_CONTROL | (5-2)); OUT_BATCH(gpgpu->batch, GEN7_PIPE_CONTROL_WRITE_TIMESTAMP); OUT_RELOC(gpgpu->batch, gpgpu->time_stamp_b.bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, GEN7_PIPE_CONTROL_GLOBAL_GTT_WRITE | idx * sizeof(uint64_t)); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(); } static void intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu) { gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); memset(pc, 0, sizeof(*pc)); pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; pc->dw1.render_target_cache_flush_enable = 1; pc->dw1.texture_cache_invalidation_enable = 1; pc->dw1.cs_stall = 1; pc->dw1.dc_flush_enable = 1; //pc->dw1.instruction_cache_invalidate_enable = 1; ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu) { gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); memset(pc, 0, sizeof(*pc)); pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; pc->dw1.cs_stall = 1; pc->dw1.dc_flush_enable = 1; pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t)); memset(pc, 0, sizeof(*pc)); pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; pc->dw1.render_target_cache_flush_enable = 1; pc->dw1.texture_cache_invalidation_enable = 1; pc->dw1.cs_stall = 1; ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_pipe_control_gen8(intel_gpgpu_t *gpgpu) { gen8_pipe_control_t* pc = (gen8_pipe_control_t*) intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen8_pipe_control_t)); memset(pc, 0, sizeof(*pc)); pc->dw0.length = SIZEOF32(gen8_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL; pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL; pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D; pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX; pc->dw1.render_target_cache_flush_enable = 1; pc->dw1.texture_cache_invalidation_enable = 1; pc->dw1.cs_stall = 1; pc->dw1.dc_flush_enable = 1; //pc->dw1.instruction_cache_invalidate_enable = 1; ADVANCE_BATCH(gpgpu->batch); } static void intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm) { BEGIN_BATCH(gpgpu->batch, 9); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET); OUT_BATCH(gpgpu->batch, 0x00A00000); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]); else OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]); else OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]); ADVANCE_BATCH(gpgpu->batch); intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_set_L3_baytrail(intel_gpgpu_t *gpgpu, uint32_t use_slm) { BEGIN_BATCH(gpgpu->batch, 9); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET); OUT_BATCH(gpgpu->batch, 0x00D30000); /* General credit : High credit = 26 : 6 */ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, 0x01020021); /* {SLM=64, URB=96, DC=16, RO=16, Sum=192} */ else OUT_BATCH(gpgpu->batch, 0x02040040); /* {SLM=0, URB=128, DC=32, RO=32, Sum=192} */ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET); OUT_BATCH(gpgpu->batch, 0x0); /* {I/S=0, Const=0, Tex=0} */ ADVANCE_BATCH(gpgpu->batch); intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm) { /* still set L3 in batch buffer for fulsim. */ if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL) { BEGIN_BATCH(gpgpu->batch, 15); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ /* FIXME: KMD always disable the atomic in L3 for some reason. I checked the spec, and don't think we need that workaround now. Before I send a patch to kernel, let's just enable it here. */ OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET); OUT_BATCH(gpgpu->batch, 0); /* enable atomic in L3 */ OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET); OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16); /* enable atomic in L3 */ } else { BEGIN_BATCH(gpgpu->batch, 9); } OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET); OUT_BATCH(gpgpu->batch, 0x08800000); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG2_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[12]); else OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg1[4]); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN7_L3_CNTL_REG3_ADDRESS_OFFSET); if (use_slm) OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[12]); else OUT_BATCH(gpgpu->batch, gpgpu_l3_config_reg2[4]); ADVANCE_BATCH(gpgpu->batch); //if(use_slm) // gpgpu->batch->enable_slm = 1; intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_set_L3_gen8(intel_gpgpu_t *gpgpu, uint32_t use_slm) { BEGIN_BATCH(gpgpu->batch, 3); OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */ OUT_BATCH(gpgpu->batch, GEN8_L3_CNTL_REG_ADDRESS_OFFSET); // FIXME, this is a workaround for switch SLM enable and disable random hang if(use_slm) OUT_BATCH(gpgpu->batch, 0x60000121); /* {SLM=192, URB=128, Rest=384} */ else OUT_BATCH(gpgpu->batch, 0x60000160); /* {SLM=0, URB=384, Rest=384, Sum=768} */ //if(use_slm) // gpgpu->batch->enable_slm = 1; intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu) { intel_batchbuffer_start_atomic(gpgpu->batch, 256); intel_gpgpu_pipe_control(gpgpu); assert(intel_gpgpu_set_L3); intel_gpgpu_set_L3(gpgpu, gpgpu->ker->use_slm); intel_gpgpu_select_pipeline(gpgpu); intel_gpgpu_set_base_address(gpgpu); intel_gpgpu_load_vfe_state(gpgpu); intel_gpgpu_load_curbe_buffer(gpgpu); intel_gpgpu_load_idrt(gpgpu); if (gpgpu->perf_b.bo) { BEGIN_BATCH(gpgpu->batch, 3); OUT_BATCH(gpgpu->batch, (0x28 << 23) | /* MI_REPORT_PERF_COUNT */ (3 - 2)); /* length-2 */ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0 | /* Offset for the start "counters" */ 1); /* Use GTT and not PGTT */ OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } /* Insert PIPE_CONTROL for time stamp of start*/ if (gpgpu->time_stamp_b.bo) intel_gpgpu_write_timestamp(gpgpu, 0); } static void intel_gpgpu_post_action_gen7(intel_gpgpu_t *gpgpu, int32_t flush_mode) { if(flush_mode) intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_post_action_gen75(intel_gpgpu_t *gpgpu, int32_t flush_mode) { /* flush force for set L3 */ intel_gpgpu_pipe_control(gpgpu); /* Restore L3 control to disable SLM mode, otherwise, may affect 3D pipeline */ intel_gpgpu_set_L3(gpgpu, 0); } static void intel_gpgpu_batch_end(intel_gpgpu_t *gpgpu, int32_t flush_mode) { /* Insert PIPE_CONTROL for time stamp of end*/ if (gpgpu->time_stamp_b.bo) intel_gpgpu_write_timestamp(gpgpu, 1); /* Insert the performance counter command */ if (gpgpu->perf_b.bo) { BEGIN_BATCH(gpgpu->batch, 3); OUT_BATCH(gpgpu->batch, (0x28 << 23) | /* MI_REPORT_PERF_COUNT */ (3 - 2)); /* length-2 */ OUT_RELOC(gpgpu->batch, gpgpu->perf_b.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 512 | /* Offset for the end "counters" */ 1); /* Use GTT and not PGTT */ OUT_BATCH(gpgpu->batch, 0); ADVANCE_BATCH(gpgpu->batch); } intel_gpgpu_post_action(gpgpu, flush_mode); intel_batchbuffer_end_atomic(gpgpu->batch); } static int intel_gpgpu_batch_reset(intel_gpgpu_t *gpgpu, size_t sz) { return intel_batchbuffer_reset(gpgpu->batch, sz); } static int intel_gpgpu_flush(intel_gpgpu_t *gpgpu) { if (!gpgpu->batch || !gpgpu->batch->buffer) return 0; return intel_batchbuffer_flush(gpgpu->batch); /* FIXME: Remove old assert here for binded buffer offset 0 which tried to guard possible NULL buffer pointer check in kernel, as in case like "runtime_null_kernel_arg", but that's wrong to just take buffer offset 0 as NULL, and cause failure for normal kernels which has no such NULL ptr check but with buffer offset 0 (which is possible now and will be normal if full PPGTT is on). Need to fix NULL ptr check otherwise. */ } static int intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, uint32_t max_threads, uint32_t size_cs_entry, int profiling) { drm_intel_bo *bo; /* Binded buffers */ gpgpu->binded_n = 0; gpgpu->img_bitmap = 0; gpgpu->img_index_base = 3; gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1); /* URB */ gpgpu->curb.num_cs_entries = 64; gpgpu->curb.size_cs_entry = size_cs_entry; gpgpu->max_threads = max_threads; if (gpgpu->printf_b.ibo) dri_bo_unreference(gpgpu->printf_b.ibo); gpgpu->printf_b.ibo = NULL; if (gpgpu->printf_b.bo) dri_bo_unreference(gpgpu->printf_b.bo); gpgpu->printf_b.bo = NULL; /* Set the profile buffer*/ if(gpgpu->time_stamp_b.bo) dri_bo_unreference(gpgpu->time_stamp_b.bo); gpgpu->time_stamp_b.bo = NULL; if (profiling) { bo = dri_bo_alloc(gpgpu->drv->bufmgr, "timestamp query", 4096, 4096); gpgpu->time_stamp_b.bo = bo; if (!bo) fprintf(stderr, "Could not allocate buffer for profiling.\n"); } /* stack */ if (gpgpu->stack_b.bo) dri_bo_unreference(gpgpu->stack_b.bo); gpgpu->stack_b.bo = NULL; /* Set the auxiliary buffer*/ uint32_t size_aux = 0; if(gpgpu->aux_buf.bo) dri_bo_unreference(gpgpu->aux_buf.bo); gpgpu->aux_buf.bo = NULL; /* begin with surface heap to make sure it's page aligned, because state base address use 20bit for the address */ gpgpu->aux_offset.surface_heap_offset = size_aux; size_aux += sizeof(surface_heap_t); //curbe must be 32 bytes aligned size_aux = ALIGN(size_aux, 64); gpgpu->aux_offset.curbe_offset = size_aux; size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 32; //idrt must be 32 bytes aligned size_aux = ALIGN(size_aux, 32); gpgpu->aux_offset.idrt_offset = size_aux; size_aux += MAX_IF_DESC * sizeof(struct gen6_interface_descriptor); //sampler state must be 32 bytes aligned size_aux = ALIGN(size_aux, 32); gpgpu->aux_offset.sampler_state_offset = size_aux; size_aux += GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t); //sampler border color state must be 32 bytes aligned size_aux = ALIGN(size_aux, 32); gpgpu->aux_offset.sampler_border_color_state_offset = size_aux; size_aux += GEN_MAX_SAMPLERS * sizeof(gen7_sampler_border_color_t); /* make sure aux buffer is page aligned */ size_aux = ALIGN(size_aux, 4096); bo = dri_bo_alloc(gpgpu->drv->bufmgr, "AUX_BUFFER", size_aux, 4096); if (!bo || dri_bo_map(bo, 1) != 0) { fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno)); if (bo) dri_bo_unreference(bo); if (profiling && gpgpu->time_stamp_b.bo) dri_bo_unreference(gpgpu->time_stamp_b.bo); gpgpu->time_stamp_b.bo = NULL; return -1; } memset(bo->virtual, 0, size_aux); gpgpu->aux_buf.bo = bo; return 0; } static void intel_gpgpu_set_buf_reloc_gen7(intel_gpgpu_t *gpgpu, int32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t); dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, obj_bo_offset, gpgpu->aux_offset.surface_heap_offset + heap->binding_table[index] + offsetof(gen7_surface_state_t, ss1), obj_bo); } static dri_bo* intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti) { if(gpgpu->constant_b.bo) dri_bo_unreference(gpgpu->constant_b.bo); gpgpu->constant_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", size, 64); if (gpgpu->constant_b.bo == NULL) return NULL; intel_gpgpu_setup_bti(gpgpu, gpgpu->constant_b.bo, 0, size, bti, I965_SURFACEFORMAT_R32G32B32A32_UINT); return gpgpu->constant_b.bo; } static void intel_gpgpu_setup_bti_gen7(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index, uint32_t format) { uint32_t s = size - 1; surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)]; memset(ss0, 0, sizeof(gen7_surface_state_t)); ss0->ss0.surface_type = I965_SURFACE_BUFFER; ss0->ss0.surface_format = format; ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte. if(format == I965_SURFACEFORMAT_RAW) assert((ss0->ss2.width & 0x03) == 3); ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t); ss0->ss1.base_addr = buf->offset + internal_offset; dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, internal_offset, gpgpu->aux_offset.surface_heap_offset + heap->binding_table[index] + offsetof(gen7_surface_state_t, ss1), buf); } static void intel_gpgpu_setup_bti_gen75(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index, uint32_t format) { uint32_t s = size - 1; surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen7_surface_state_t *ss0 = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)]; memset(ss0, 0, sizeof(gen7_surface_state_t)); ss0->ss0.surface_type = I965_SURFACE_BUFFER; ss0->ss0.surface_format = format; if(format != I965_SURFACEFORMAT_RAW) { ss0->ss7.shader_r = I965_SURCHAN_SELECT_RED; ss0->ss7.shader_g = I965_SURCHAN_SELECT_GREEN; ss0->ss7.shader_b = I965_SURCHAN_SELECT_BLUE; ss0->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA; } ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte. if(format == I965_SURFACEFORMAT_RAW) assert((ss0->ss2.width & 0x03) == 3); ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t); ss0->ss1.base_addr = buf->offset + internal_offset; dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, internal_offset, gpgpu->aux_offset.surface_heap_offset + heap->binding_table[index] + offsetof(gen7_surface_state_t, ss1), buf); } static void intel_gpgpu_setup_bti_gen8(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index, uint32_t format) { uint32_t s = size - 1; surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen8_surface_state_t *ss0 = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)]; memset(ss0, 0, sizeof(gen8_surface_state_t)); ss0->ss0.surface_type = I965_SURFACE_BUFFER; ss0->ss0.surface_format = format; if(format != I965_SURFACEFORMAT_RAW) { ss0->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; ss0->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; ss0->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE; ss0->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA; } ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */ // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 4 byte. if(format == I965_SURFACEFORMAT_RAW) assert((ss0->ss2.width & 0x03) == 3); ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */ ss0->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen8_surface_state_t); ss0->ss8.surface_base_addr_lo = (buf->offset64 + internal_offset) & 0xffffffff; ss0->ss9.surface_base_addr_hi = ((buf->offset64 + internal_offset) >> 32) & 0xffffffff; dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, internal_offset, gpgpu->aux_offset.surface_heap_offset + heap->binding_table[index] + offsetof(gen8_surface_state_t, ss8), buf); } static int intel_is_surface_array(cl_mem_object_type type) { if (type == CL_MEM_OBJECT_IMAGE1D_ARRAY || type == CL_MEM_OBJECT_IMAGE2D_ARRAY) return 1; return 0; } static int intel_get_surface_type(cl_mem_object_type type) { switch (type) { case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: return I965_SURFACE_1D; case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: return I965_SURFACE_2D; case CL_MEM_OBJECT_IMAGE3D: return I965_SURFACE_3D; default: assert(0); } return 0; } /* Get fixed surface type. If it is a 1D array image with a large index, we need to fixup it to 2D type due to a Gen7/Gen75's sampler issue on a integer type surface with clamp address mode and nearest filter mode. */ static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_type type) { uint32_t surface_type; if (((IS_IVYBRIDGE(gpgpu->drv->device_id) || IS_HASWELL(gpgpu->drv->device_id) || IS_BROADWELL(gpgpu->drv->device_id) || IS_CHERRYVIEW(gpgpu->drv->device_id) || IS_SKYLAKE(gpgpu->drv->device_id))) && index >= BTI_WORKAROUND_IMAGE_OFFSET + BTI_RESERVED_NUM && type == CL_MEM_OBJECT_IMAGE1D_ARRAY) surface_type = I965_SURFACE_2D; else surface_type = intel_get_surface_type(type); return surface_type; } static void intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen7_surface_state_t *ss = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)]; memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 ss->ss0.surface_type = get_surface_type(gpgpu, index, type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; } ss->ss0.surface_format = format; ss->ss1.base_addr = obj_bo->offset + obj_bo_offset; ss->ss2.width = w - 1; ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; ss->ss4.not_str_buf.rt_view_extent = depth - 1; ss->ss4.not_str_buf.min_array_element = 0; ss->ss3.pitch = pitch - 1; ss->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); if (tiling == GPGPU_TILE_X) { ss->ss0.tiled_surface = 1; ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; } else if (tiling == GPGPU_TILE_Y) { ss->ss0.tiled_surface = 1; ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen7_surface_state_t *ss = (gen7_surface_state_t *) &heap->surface[index * sizeof(gen7_surface_state_t)]; memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 ss->ss0.surface_type = get_surface_type(gpgpu, index, type); if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; } ss->ss0.surface_format = format; ss->ss1.base_addr = obj_bo->offset + obj_bo_offset; ss->ss2.width = w - 1; ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; ss->ss4.not_str_buf.rt_view_extent = depth - 1; ss->ss4.not_str_buf.min_array_element = 0; ss->ss3.pitch = pitch - 1; ss->ss5.cache_control = cl_gpgpu_get_cache_ctrl(); ss->ss7.shader_r = I965_SURCHAN_SELECT_RED; ss->ss7.shader_g = I965_SURCHAN_SELECT_GREEN; ss->ss7.shader_b = I965_SURCHAN_SELECT_BLUE; ss->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA; if (tiling == GPGPU_TILE_X) { ss->ss0.tiled_surface = 1; ss->ss0.tile_walk = I965_TILEWALK_XMAJOR; } else if (tiling == GPGPU_TILE_Y) { ss->ss0.tiled_surface = 1; ss->ss0.tile_walk = I965_TILEWALK_YMAJOR; } ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset); assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)]; memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 ss->ss0.surface_type = get_surface_type(gpgpu, index, type); ss->ss0.surface_format = format; if (intel_is_surface_array(type)) { ss->ss0.surface_array = 1; ss->ss1.surface_qpitch = (h + 3)/4; } ss->ss0.horizontal_alignment = 1; ss->ss0.vertical_alignment = 1; if (tiling == GPGPU_TILE_X) { ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR; } else if (tiling == GPGPU_TILE_Y) { ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR; } else assert(tiling == GPGPU_NO_TILE);// W mode is not supported now. ss->ss2.width = w - 1; ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; ss->ss4.render_target_view_ext = depth - 1; ss->ss4.min_array_elt = 0; ss->ss3.surface_pitch = pitch - 1; ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE; ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA; ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * surface_state_sz; dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, obj_bo_offset, gpgpu->aux_offset.surface_heap_offset + heap->binding_table[index] + offsetof(gen8_surface_state_t, ss8), obj_bo); assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu, uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset, uint32_t format, cl_mem_object_type type, uint32_t bpp, int32_t w, int32_t h, int32_t depth, int32_t pitch, int32_t slice_pitch, int32_t tiling) { surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)]; memset(ss, 0, sizeof(*ss)); ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2 ss->ss0.surface_type = get_surface_type(gpgpu, index, type); ss->ss0.surface_format = format; if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_1D) { ss->ss0.surface_array = 1; ss->ss1.surface_qpitch = (slice_pitch/bpp + 3)/4; //align_h } if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_2D) { ss->ss0.surface_array = 1; ss->ss1.surface_qpitch = (slice_pitch/pitch + 3)/4; } if(ss->ss0.surface_type == I965_SURFACE_3D) ss->ss1.surface_qpitch = (slice_pitch/pitch + 3)/4; ss->ss0.horizontal_alignment = 1; ss->ss0.vertical_alignment = 1; if (tiling == GPGPU_TILE_X) { ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR; } else if (tiling == GPGPU_TILE_Y) { ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR; } else assert(tiling == GPGPU_NO_TILE);// W mode is not supported now. ss->ss2.width = w - 1; ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; ss->ss4.render_target_view_ext = depth - 1; ss->ss4.min_array_elt = 0; ss->ss3.surface_pitch = pitch - 1; ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE; ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA; ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * surface_state_sz; dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, obj_bo_offset, gpgpu->aux_offset.surface_heap_offset + heap->binding_table[index] + offsetof(gen8_surface_state_t, ss8), obj_bo); assert(index < GEN_MAX_SURFACES); } static void intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, uint32_t internal_offset, uint32_t size, uint8_t bti) { assert(gpgpu->binded_n < max_buf_n); gpgpu->binded_buf[gpgpu->binded_n] = buf; gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset; gpgpu->binded_offset[gpgpu->binded_n] = offset; gpgpu->binded_n++; intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti, I965_SURFACEFORMAT_RAW); } static int intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; drm_intel_bo* old = gpgpu->scratch_b.bo; uint32_t total = per_thread_size * gpgpu->max_threads; /* Per Bspec, scratch should 2X the desired size, otherwise luxmark may hang */ if (IS_HASWELL(gpgpu->drv->device_id)) total *= 2; gpgpu->per_thread_scratch = per_thread_size; if(old && old->size < total) { drm_intel_bo_unreference(old); old = NULL; } if(!old && total) { gpgpu->scratch_b.bo = drm_intel_bo_alloc(bufmgr, "SCRATCH_BO", total, 4096); if (gpgpu->scratch_b.bo == NULL) return -1; } return 0; } static void intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint8_t bti) { drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64); cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)gpgpu->stack_b.bo, offset, 0, size, bti); } static void intel_gpgpu_build_idrt_gen7(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gen6_interface_descriptor_t *desc; drm_intel_bo *ker_bo = NULL; desc = (gen6_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset); memset(desc, 0, sizeof(*desc)); ker_bo = (drm_intel_bo *) kernel->bo; desc->desc0.kernel_start_pointer = ker_bo->offset >> 6; /* reloc */ desc->desc1.single_program_flow = 0; desc->desc1.floating_point_mode = 0; /* use IEEE-754 rule */ desc->desc5.rounding_mode = 0; /* round to nearest even */ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0); desc->desc2.sampler_state_pointer = (gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) >> 5; desc->desc3.binding_table_entry_count = 0; /* no prefetch */ desc->desc3.binding_table_pointer = 0; desc->desc4.curbe_read_len = kernel->curbe_sz / 32; desc->desc4.curbe_read_offset = 0; /* Barriers / SLM are automatically handled on Gen7+ */ if (gpgpu->drv->gen_ver == 7 || gpgpu->drv->gen_ver == 75) { size_t slm_sz = kernel->slm_sz; desc->desc5.group_threads_num = kernel->use_slm ? kernel->thread_n : 0; desc->desc5.barrier_enable = kernel->use_slm; if (slm_sz <= 4*KB) slm_sz = 4*KB; else if (slm_sz <= 8*KB) slm_sz = 8*KB; else if (slm_sz <= 16*KB) slm_sz = 16*KB; else if (slm_sz <= 32*KB) slm_sz = 32*KB; else slm_sz = 64*KB; slm_sz = slm_sz >> 12; desc->desc5.slm_sz = slm_sz; } else desc->desc5.group_threads_num = kernel->barrierID; /* BarrierID on GEN6 */ dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0, gpgpu->aux_offset.idrt_offset + offsetof(gen6_interface_descriptor_t, desc0), ker_bo); dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_SAMPLER, 0, gpgpu->aux_offset.sampler_state_offset, gpgpu->aux_offset.idrt_offset + offsetof(gen6_interface_descriptor_t, desc2), gpgpu->aux_buf.bo); } static void intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gen8_interface_descriptor_t *desc; desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset); memset(desc, 0, sizeof(*desc)); desc->desc0.kernel_start_pointer = 0; /* reloc */ desc->desc2.single_program_flow = 0; desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */ desc->desc6.rounding_mode = 0; /* round to nearest even */ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0); desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5; desc->desc4.binding_table_entry_count = 0; /* no prefetch */ desc->desc4.binding_table_pointer = 0; desc->desc5.curbe_read_len = kernel->curbe_sz / 32; desc->desc5.curbe_read_offset = 0; /* Barriers / SLM are automatically handled on Gen7+ */ size_t slm_sz = kernel->slm_sz; /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */ desc->desc6.group_threads_num = kernel->thread_n; desc->desc6.barrier_enable = kernel->use_slm; if (slm_sz == 0) slm_sz = 0; else if (slm_sz <= 4*KB) slm_sz = 4*KB; else if (slm_sz <= 8*KB) slm_sz = 8*KB; else if (slm_sz <= 16*KB) slm_sz = 16*KB; else if (slm_sz <= 32*KB) slm_sz = 32*KB; else slm_sz = 64*KB; slm_sz = slm_sz >> 12; desc->desc6.slm_sz = slm_sz; } static void intel_gpgpu_build_idrt_gen9(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gen8_interface_descriptor_t *desc; desc = (gen8_interface_descriptor_t*) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.idrt_offset); memset(desc, 0, sizeof(*desc)); desc->desc0.kernel_start_pointer = 0; /* reloc */ desc->desc2.single_program_flow = 0; desc->desc2.floating_point_mode = 0; /* use IEEE-754 rule */ desc->desc6.rounding_mode = 0; /* round to nearest even */ assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_state_offset) % 32 == 0); desc->desc3.sampler_state_pointer = gpgpu->aux_offset.sampler_state_offset >> 5; desc->desc4.binding_table_entry_count = 0; /* no prefetch */ desc->desc4.binding_table_pointer = 0; desc->desc5.curbe_read_len = kernel->curbe_sz / 32; desc->desc5.curbe_read_offset = 0; /* Barriers / SLM are automatically handled on Gen7+ */ size_t slm_sz = kernel->slm_sz; /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */ desc->desc6.group_threads_num = kernel->thread_n; desc->desc6.barrier_enable = kernel->use_slm; if (slm_sz == 0) slm_sz = 0; else if (slm_sz <= 1*KB) slm_sz = 1; else if (slm_sz <= 2*KB) slm_sz = 2; else if (slm_sz <= 4*KB) slm_sz = 3; else if (slm_sz <= 8*KB) slm_sz = 4; else if (slm_sz <= 16*KB) slm_sz = 5; else if (slm_sz <= 32*KB) slm_sz = 6; else slm_sz = 7; desc->desc6.slm_sz = slm_sz; } static int intel_gpgpu_upload_curbes(intel_gpgpu_t *gpgpu, const void* data, uint32_t size) { unsigned char *curbe = NULL; cl_gpgpu_kernel *k = gpgpu->ker; uint32_t i, j; /* Upload the data first */ if (dri_bo_map(gpgpu->aux_buf.bo, 1) != 0) { fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno)); return -1; } assert(gpgpu->aux_buf.bo->virtual); curbe = (unsigned char *) (gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.curbe_offset); memcpy(curbe, data, size); /* Now put all the relocations for our flat address space */ for (i = 0; i < k->thread_n; ++i) for (j = 0; j < gpgpu->binded_n; ++j) { *(uint32_t*)(curbe + gpgpu->binded_offset[j]+i*k->curbe_sz) = gpgpu->binded_buf[j]->offset + gpgpu->target_buf_offset[j]; drm_intel_bo_emit_reloc(gpgpu->aux_buf.bo, gpgpu->aux_offset.curbe_offset + gpgpu->binded_offset[j]+i*k->curbe_sz, gpgpu->binded_buf[j], gpgpu->target_buf_offset[j], I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } dri_bo_unmap(gpgpu->aux_buf.bo); return 0; } static void intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n) { if (n) { const size_t sz = n * sizeof(gen6_sampler_state_t); memcpy(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset, data, sz); } } int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest) { switch( cl_address_mode ) { case CLK_ADDRESS_NONE: case CLK_ADDRESS_REPEAT: return GEN_TEXCOORDMODE_WRAP; case CLK_ADDRESS_CLAMP: return GEN_TEXCOORDMODE_CLAMP_BORDER; case CLK_ADDRESS_CLAMP_TO_EDGE: return GEN_TEXCOORDMODE_CLAMP; case CLK_ADDRESS_MIRRORED_REPEAT: return GEN_TEXCOORDMODE_MIRROR; default: return GEN_TEXCOORDMODE_WRAP; } } static void intel_gpgpu_insert_sampler_gen7(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler) { int using_nearest = 0; uint32_t wrap_mode; gen7_sampler_state_t *sampler; sampler = (gen7_sampler_state_t *)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index; memset(sampler, 0, sizeof(*sampler)); assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) % 32 == 0); sampler->ss2.default_color_pointer = (gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) >> 5; if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE) sampler->ss3.non_normalized_coord = 1; else sampler->ss3.non_normalized_coord = 0; switch (clk_sampler & __CLK_FILTER_MASK) { case CLK_FILTER_NEAREST: sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST; sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST; using_nearest = 1; break; case CLK_FILTER_LINEAR: sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR; sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR; break; } wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest); sampler->ss3.s_wrap_mode = wrap_mode; /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */ sampler->ss3.t_wrap_mode = wrap_mode; sampler->ss3.r_wrap_mode = wrap_mode; sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ sampler->ss0.base_level = 0; sampler->ss1.max_lod = 0; sampler->ss1.min_lod = 0; if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST) sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN | GEN_ADDRESS_ROUNDING_ENABLE_V_MIN | GEN_ADDRESS_ROUNDING_ENABLE_R_MIN; if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST) sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG | GEN_ADDRESS_ROUNDING_ENABLE_V_MAG | GEN_ADDRESS_ROUNDING_ENABLE_R_MAG; dri_bo_emit_reloc(gpgpu->aux_buf.bo, I915_GEM_DOMAIN_SAMPLER, 0, gpgpu->aux_offset.sampler_border_color_state_offset, gpgpu->aux_offset.sampler_state_offset + index * sizeof(gen7_sampler_state_t) + offsetof(gen7_sampler_state_t, ss2), gpgpu->aux_buf.bo); } static void intel_gpgpu_insert_sampler_gen8(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sampler) { int using_nearest = 0; uint32_t wrap_mode; gen8_sampler_state_t *sampler; sampler = (gen8_sampler_state_t *)(gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.sampler_state_offset) + index; memset(sampler, 0, sizeof(*sampler)); assert((gpgpu->aux_buf.bo->offset + gpgpu->aux_offset.sampler_border_color_state_offset) % 32 == 0); if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE) sampler->ss3.non_normalized_coord = 1; else sampler->ss3.non_normalized_coord = 0; switch (clk_sampler & __CLK_FILTER_MASK) { case CLK_FILTER_NEAREST: sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST; sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST; using_nearest = 1; break; case CLK_FILTER_LINEAR: sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR; sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR; break; } wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest); sampler->ss3.s_wrap_mode = wrap_mode; /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */ sampler->ss3.t_wrap_mode = wrap_mode; sampler->ss3.r_wrap_mode = wrap_mode; sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ sampler->ss0.base_level = 0; sampler->ss1.max_lod = 0; sampler->ss1.min_lod = 0; if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST) sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN | GEN_ADDRESS_ROUNDING_ENABLE_V_MIN | GEN_ADDRESS_ROUNDING_ENABLE_R_MIN; if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST) sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG | GEN_ADDRESS_ROUNDING_ENABLE_V_MAG | GEN_ADDRESS_ROUNDING_ENABLE_R_MAG; } static void intel_gpgpu_bind_sampler_gen7(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz) { int index; assert(sampler_sz <= GEN_MAX_SAMPLERS); for(index = 0; index < sampler_sz; index++) intel_gpgpu_insert_sampler_gen7(gpgpu, index, samplers[index]); } static void intel_gpgpu_bind_sampler_gen8(intel_gpgpu_t *gpgpu, uint32_t *samplers, size_t sampler_sz) { int index; assert(sampler_sz <= GEN_MAX_SAMPLERS); for(index = 0; index < sampler_sz; index++) intel_gpgpu_insert_sampler_gen8(gpgpu, index, samplers[index]); } static void intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { gpgpu->ker = kernel; intel_gpgpu_build_idrt(gpgpu, kernel); dri_bo_unmap(gpgpu->aux_buf.bo); } static void intel_gpgpu_set_perf_counters(intel_gpgpu_t *gpgpu, cl_buffer *perf) { if (gpgpu->perf_b.bo) drm_intel_bo_unreference(gpgpu->perf_b.bo); drm_intel_bo_reference((drm_intel_bo*) perf); gpgpu->perf_b.bo = (drm_intel_bo*) perf; } static void intel_gpgpu_walker_gen7(intel_gpgpu_t *gpgpu, uint32_t simd_sz, uint32_t thread_n, const size_t global_wk_off[3], const size_t global_wk_sz[3], const size_t local_wk_sz[3]) { const uint32_t global_wk_dim[3] = { global_wk_sz[0] / local_wk_sz[0], global_wk_sz[1] / local_wk_sz[1], global_wk_sz[2] / local_wk_sz[2] }; uint32_t right_mask = ~0x0; size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2]; assert(simd_sz == 8 || simd_sz == 16); uint32_t shift = (group_sz & (simd_sz - 1)); shift = (shift == 0) ? simd_sz : shift; right_mask = (1 << shift) - 1; BEGIN_BATCH(gpgpu->batch, 11); OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 9); OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */ assert(thread_n <= 64); if (simd_sz == 16) OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */ else OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[0]); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[1]); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[2]); OUT_BATCH(gpgpu->batch, right_mask); OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/ ADVANCE_BATCH(gpgpu->batch); BEGIN_BATCH(gpgpu->batch, 2); OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0); OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */ ADVANCE_BATCH(gpgpu->batch); if (IS_IVYBRIDGE(gpgpu->drv->device_id)) intel_gpgpu_pipe_control(gpgpu); } static void intel_gpgpu_walker_gen8(intel_gpgpu_t *gpgpu, uint32_t simd_sz, uint32_t thread_n, const size_t global_wk_off[3], const size_t global_wk_sz[3], const size_t local_wk_sz[3]) { const uint32_t global_wk_dim[3] = { global_wk_sz[0] / local_wk_sz[0], global_wk_sz[1] / local_wk_sz[1], global_wk_sz[2] / local_wk_sz[2] }; uint32_t right_mask = ~0x0; size_t group_sz = local_wk_sz[0] * local_wk_sz[1] * local_wk_sz[2]; assert(simd_sz == 8 || simd_sz == 16); uint32_t shift = (group_sz & (simd_sz - 1)); shift = (shift == 0) ? simd_sz : shift; right_mask = (1 << shift) - 1; BEGIN_BATCH(gpgpu->batch, 15); OUT_BATCH(gpgpu->batch, CMD_GPGPU_WALKER | 13); OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */ OUT_BATCH(gpgpu->batch, 0); /* Indirect Data Length */ OUT_BATCH(gpgpu->batch, 0); /* Indirect Data Start Address */ assert(thread_n <= 64); if (simd_sz == 16) OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */ else OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */ OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[0]); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[1]); OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[2]); OUT_BATCH(gpgpu->batch, right_mask); OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/ ADVANCE_BATCH(gpgpu->batch); BEGIN_BATCH(gpgpu->batch, 2); OUT_BATCH(gpgpu->batch, CMD_MEDIA_STATE_FLUSH | 0); OUT_BATCH(gpgpu->batch, 0); /* kernel index == 0 */ ADVANCE_BATCH(gpgpu->batch); intel_gpgpu_pipe_control(gpgpu); } static intel_event_t* intel_gpgpu_event_new(intel_gpgpu_t *gpgpu) { intel_event_t *event = NULL; TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t)); event->buffer = gpgpu->batch->buffer; if (event->buffer) drm_intel_bo_reference(event->buffer); event->status = command_queued; if(gpgpu->time_stamp_b.bo) { event->ts_buf = gpgpu->time_stamp_b.bo; drm_intel_bo_reference(event->ts_buf); } exit: return event; error: cl_free(event); event = NULL; goto exit; } /* The upper layer already flushed the batch buffer, just update internal status to command_submitted. */ static void intel_gpgpu_event_flush(intel_event_t *event) { assert(event->status == command_queued); event->status = command_running; } static int intel_gpgpu_event_update_status(intel_event_t *event, int wait) { if(event->status == command_complete) return event->status; if (event->buffer && event->status == command_running && !drm_intel_bo_busy(event->buffer)) { event->status = command_complete; drm_intel_bo_unreference(event->buffer); event->buffer = NULL; return event->status; } if(wait == 0) return event->status; if (event->buffer) { drm_intel_bo_wait_rendering(event->buffer); event->status = command_complete; drm_intel_bo_unreference(event->buffer); event->buffer = NULL; } return event->status; } static void intel_gpgpu_event_delete(intel_event_t *event) { if(event->buffer) drm_intel_bo_unreference(event->buffer); if(event->ts_buf) drm_intel_bo_unreference(event->ts_buf); cl_free(event); } /* IVB and HSW's result MUST shift in x86_64 system */ static uint64_t intel_gpgpu_read_ts_reg_gen7(drm_intel_bufmgr *bufmgr) { uint64_t result = 0; drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result); /* In x86_64 system, the low 32bits of timestamp count are stored in the high 32 bits of result which got from drm_intel_reg_read, and 32-35 bits are lost; but match bspec in i386 system. It seems the kernel readq bug. So shift 32 bit in x86_64, and only remain 32 bits data in i386. */ #ifdef __i386__ return result & 0x0ffffffff; #else return result >> 32; #endif /* __i386__ */ } /* baytrail's result should clear high 4 bits */ static uint64_t intel_gpgpu_read_ts_reg_baytrail(drm_intel_bufmgr *bufmgr) { uint64_t result = 0; drm_intel_reg_read(bufmgr, TIMESTAMP_ADDR, &result); return result & 0x0ffffffff; } /* We want to get the current time of GPU. */ static void intel_gpgpu_event_get_gpu_cur_timestamp(intel_gpgpu_t* gpgpu, uint64_t* ret_ts) { uint64_t result = 0; drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr; /* Get the ts that match the bspec */ result = intel_gpgpu_read_ts_reg(bufmgr); result *= 80; *ret_ts = result; return; } /* Get the GPU execute time. */ static void intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event, int index, uint64_t* ret_ts) { uint64_t result = 0; assert(event->ts_buf != NULL); assert(index == 0 || index == 1); drm_intel_gem_bo_map_gtt(event->ts_buf); uint64_t* ptr = event->ts_buf->virtual; result = ptr[index]; /* According to BSpec, the timestamp counter should be 36 bits, but comparing to the timestamp counter from IO control reading, we find the first 4 bits seems to be fake. In order to keep the timestamp counter conformable, we just skip the first 4 bits. */ result = (result & 0x0FFFFFFFF) * 80; //convert to nanoseconds *ret_ts = result; drm_intel_gem_bo_unmap_gtt(event->ts_buf); } static int intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti) { drm_intel_bo *bo = NULL; if (i == 0) { // the index buffer. if (gpgpu->printf_b.ibo) dri_bo_unreference(gpgpu->printf_b.ibo); gpgpu->printf_b.ibo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf index buffer", size, 4096); bo = gpgpu->printf_b.ibo; } else if (i == 1) { if (gpgpu->printf_b.bo) dri_bo_unreference(gpgpu->printf_b.bo); gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf output buffer", size, 4096); bo = gpgpu->printf_b.bo; } else assert(0); if (!bo || (drm_intel_bo_map(bo, 1) != 0)) { if (gpgpu->printf_b.bo) drm_intel_bo_unreference(gpgpu->printf_b.bo); gpgpu->printf_b.bo = NULL; fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno)); return -1; } memset(bo->virtual, 0, size); drm_intel_bo_unmap(bo); cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti); return 0; } static void* intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i) { drm_intel_bo *bo = NULL; if (i == 0) { bo = gpgpu->printf_b.ibo; } else if (i == 1) { bo = gpgpu->printf_b.bo; } else assert(0); drm_intel_bo_map(bo, 1); return bo->virtual; } static void intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu, uint32_t i) { drm_intel_bo *bo = NULL; if (i == 0) { bo = gpgpu->printf_b.ibo; } else if (i == 1) { bo = gpgpu->printf_b.bo; } else assert(0); drm_intel_bo_unmap(bo); } static void intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i) { if (i == 0) { drm_intel_bo_unreference(gpgpu->printf_b.ibo); gpgpu->printf_b.ibo = NULL; } else if (i == 1) { drm_intel_bo_unreference(gpgpu->printf_b.bo); gpgpu->printf_b.bo = NULL; } else assert(0); } static void intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info, size_t * global_sz) { gpgpu->printf_info = printf_info; gpgpu->global_wk_sz[0] = global_sz[0]; gpgpu->global_wk_sz[1] = global_sz[1]; gpgpu->global_wk_sz[2] = global_sz[2]; } static void* intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu, size_t * global_sz, size_t *outbuf_sz) { global_sz[0] = gpgpu->global_wk_sz[0]; global_sz[1] = gpgpu->global_wk_sz[1]; global_sz[2] = gpgpu->global_wk_sz[2]; if (gpgpu->printf_b.bo) *outbuf_sz = gpgpu->printf_b.bo->size; return gpgpu->printf_info; } LOCAL void intel_set_gpgpu_callbacks(int device_id) { cl_gpgpu_new = (cl_gpgpu_new_cb *) intel_gpgpu_new; cl_gpgpu_delete = (cl_gpgpu_delete_cb *) intel_gpgpu_delete; cl_gpgpu_sync = (cl_gpgpu_sync_cb *) intel_gpgpu_sync; cl_gpgpu_bind_buf = (cl_gpgpu_bind_buf_cb *) intel_gpgpu_bind_buf; cl_gpgpu_set_stack = (cl_gpgpu_set_stack_cb *) intel_gpgpu_set_stack; cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init; cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters; cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes; cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer; cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup; cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers; cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset; cl_gpgpu_batch_start = (cl_gpgpu_batch_start_cb *) intel_gpgpu_batch_start; cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end; cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen7; cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch; cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new; cl_gpgpu_event_flush = (cl_gpgpu_event_flush_cb *)intel_gpgpu_event_flush; cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status; cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete; cl_gpgpu_event_get_exec_timestamp = (cl_gpgpu_event_get_exec_timestamp_cb *)intel_gpgpu_event_get_exec_timestamp; cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp; cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf; cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf; cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf; cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf; cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr; cl_gpgpu_release_printf_buffer = (cl_gpgpu_release_printf_buffer_cb *)intel_gpgpu_release_printf_buf; cl_gpgpu_set_printf_info = (cl_gpgpu_set_printf_info_cb *)intel_gpgpu_set_printf_info; cl_gpgpu_get_printf_info = (cl_gpgpu_get_printf_info_cb *)intel_gpgpu_get_printf_info; if (IS_BROADWELL(device_id) || IS_CHERRYVIEW(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen8; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8; intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //BDW need not restore SLM, same as gen7 intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; if(IS_CHERRYVIEW(device_id)) intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail; intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen8; intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8; intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8; cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8; intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen8; intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8; intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8; intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7; return; } if (IS_SKYLAKE(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8; intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; //BDW need not restore SLM, same as gen7 intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen9; intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen8; intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen8; cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen8; intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen9; intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8; cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8; intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen8; intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen9; return; } intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen7; intel_gpgpu_load_vfe_state = intel_gpgpu_load_vfe_state_gen7; cl_gpgpu_walker = (cl_gpgpu_walker_cb *)intel_gpgpu_walker_gen7; intel_gpgpu_build_idrt = intel_gpgpu_build_idrt_gen7; intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen7; intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen7; intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7; if (IS_HASWELL(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen75; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75; intel_gpgpu_post_action = intel_gpgpu_post_action_gen75; intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75; intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75; } else if (IS_IVYBRIDGE(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7; if (IS_BAYTRAIL_T(device_id)) { intel_gpgpu_set_L3 = intel_gpgpu_set_L3_baytrail; intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail; } else { intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen7; intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; } cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen7; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7; intel_gpgpu_post_action = intel_gpgpu_post_action_gen7; intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7; intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7; } } Beignet-1.1.1-Source/src/cl_kernel.h000664 001750 001750 00000011022 12576733264 016366 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_KERNEL_H__ #define __CL_KERNEL_H__ #include "cl_internals.h" #include "cl_driver.h" #include "cl_gbe_loader.h" #include "CL/cl.h" #include #include /* This is the kernel as it is interfaced by the compiler */ struct _gbe_kernel; /* We need to save buffer data for relocation and binding and we must figure out * if all arguments are properly set */ typedef struct cl_argument { cl_mem mem; /* For image and regular buffers */ cl_sampler sampler; /* For sampler. */ unsigned char bti; uint32_t local_sz:31; /* For __local size specification */ uint32_t is_set:1; /* All args must be set before NDRange */ } cl_argument; /* One OCL function */ struct _cl_kernel { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a kernel */ volatile int ref_n; /* We reference count this object */ cl_buffer bo; /* The code itself */ cl_program program; /* Owns this structure (and pointers) */ gbe_kernel opaque; /* (Opaque) compiler structure for the OCL kernel */ char *curbe; /* One curbe per kernel */ size_t curbe_sz; /* Size of it */ uint32_t samplers[GEN_MAX_SAMPLERS]; /* samplers defined in kernel & kernel args */ size_t sampler_sz; /* sampler size defined in kernel & kernel args. */ struct ImageInfo *images; /* images defined in kernel args */ size_t image_sz; /* image count in kernel args */ cl_ulong local_mem_sz; /* local memory size specified in kernel args. */ size_t compile_wg_sz[3]; /* Required workgroup size by __attribute__((reqd_work_gro up_size(X, Y, Z))) qualifier.*/ size_t global_work_sz[3]; /* maximum global size that can be used to execute a kernel (i.e. global_work_size argument to clEnqueueNDRangeKernel.)*/ size_t stack_size; /* stack size per work item. */ cl_argument *args; /* To track argument setting */ uint32_t arg_n:31; /* Number of arguments */ uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */ }; /* Allocate an empty kernel */ extern cl_kernel cl_kernel_new(cl_program); /* Destroy and deallocate an empty kernel */ extern void cl_kernel_delete(cl_kernel); /* Setup the kernel with the given GBE Kernel */ extern void cl_kernel_setup(cl_kernel k, gbe_kernel opaque); /* Get the kernel name */ extern const char *cl_kernel_get_name(cl_kernel k); /* Get the kernel attributes*/ extern const char *cl_kernel_get_attributes(cl_kernel k); /* Get the simd width as used in the code */ extern uint32_t cl_kernel_get_simd_width(cl_kernel k); /* When a kernel is created from outside, we just duplicate the structure we * have internally and give it back to the user */ extern cl_kernel cl_kernel_dup(cl_kernel); /* Add one more reference on the kernel object */ extern void cl_kernel_add_ref(cl_kernel); /* Set the argument before kernel execution */ extern int cl_kernel_set_arg(cl_kernel, uint32_t arg_index, size_t arg_size, const void *arg_value); /* Get the argument information */ extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); /* Compute and check the work group size from the user provided local size */ extern cl_int cl_kernel_work_group_sz(cl_kernel ker, const size_t *local_wk_sz, cl_uint wk_dim, size_t *wk_grp_sz); #endif /* __CL_KERNEL_H__ */ Beignet-1.1.1-Source/src/cl_api.c000664 001750 001750 00000311342 12605356050 015645 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_command_queue.h" #include "cl_enqueue.h" #include "cl_event.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_mem.h" #include "cl_image.h" #include "cl_sampler.h" #include "cl_alloc.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_ext.h" #include "CL/cl_intel.h" #include #include #include #include #include "performance.h" #ifndef CL_VERSION_1_2 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) #define CL_DEVICE_TYPE_CUSTOM (1 << 4) #define CL_MEM_HOST_WRITE_ONLY (1 << 7) #define CL_MEM_HOST_READ_ONLY (1 << 8) #define CL_MEM_HOST_NO_ACCESS (1 << 9) typedef intptr_t cl_device_partition_property; #endif #define FILL_GETINFO_RET(TYPE, ELT, VAL, RET) \ do { \ if (param_value && param_value_size < sizeof(TYPE)*ELT) \ return CL_INVALID_VALUE; \ if (param_value) { \ memcpy(param_value, (VAL), sizeof(TYPE)*ELT); \ } \ \ if (param_value_size_ret) \ *param_value_size_ret = sizeof(TYPE)*ELT; \ return RET; \ } while(0) inline cl_int handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list, cl_event* event, enqueue_data* data, cl_command_type type) { cl_int status = cl_event_wait_events(num, wait_list, queue); cl_event e = NULL; if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { e = cl_event_new(queue->ctx, queue, type, event!=NULL); /* if need profiling, add the submit timestamp here. */ if (e->type != CL_COMMAND_USER && e->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED); cl_event_get_queued_cpu_timestamp(e); } if(event != NULL) *event = e; if(status == CL_ENQUEUE_EXECUTE_DEFER) { cl_event_new_enqueue_callback(e, data, num, wait_list); } } set_current_event(queue, e); return status; } /* The following code checking overlap is from Appendix of openCL spec 1.1 */ cl_bool check_copy_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3], size_t row_pitch, size_t slice_pitch) { const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]}; const size_t src_max[] = {src_offset[0] + region[0], src_offset[1] + region[1], src_offset[2] + region[2]}; const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]}; const size_t dst_max[] = {dst_offset[0] + region[0], dst_offset[1] + region[1], dst_offset[2] + region[2]}; // Check for overlap cl_bool overlap = CL_TRUE; unsigned i; size_t dst_start = dst_offset[2] * slice_pitch + dst_offset[1] * row_pitch + dst_offset[0]; size_t dst_end = dst_start + (region[2] * slice_pitch + region[1] * row_pitch + region[0]); size_t src_start = src_offset[2] * slice_pitch + src_offset[1] * row_pitch + src_offset[0]; size_t src_end = src_start + (region[2] * slice_pitch + region[1] * row_pitch + region[0]); for (i=0; i != 3; ++i) { overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]); } if (!overlap) { size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ? src_offset[0] + region[0] - row_pitch : 0; size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ? dst_offset[0] + region[0] - row_pitch : 0; if ( (delta_src_x > 0 && delta_src_x > dst_offset[0]) || (delta_dst_x > 0 && delta_dst_x > src_offset[0]) ) { if ( (src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end) ) overlap = CL_TRUE; } if (region[2] > 1) { size_t src_height = slice_pitch / row_pitch; size_t dst_height = slice_pitch / row_pitch; size_t delta_src_y = (src_offset[1] + region[1] > src_height) ? src_offset[1] + region[1] - src_height : 0; size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ? dst_offset[1] + region[1] - dst_height : 0; if ( (delta_src_y > 0 && delta_src_y > dst_offset[1]) || (delta_dst_y > 0 && delta_dst_y > src_offset[1]) ) { if ( (src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end) ) overlap = CL_TRUE; } } } return overlap; } static cl_int cl_check_device_type(cl_device_type device_type) { const cl_device_type valid = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT | CL_DEVICE_TYPE_CUSTOM; if( (device_type & valid) == 0) { return CL_INVALID_DEVICE_TYPE; } if(UNLIKELY(!(device_type & CL_DEVICE_TYPE_DEFAULT) && !(device_type & CL_DEVICE_TYPE_GPU))) return CL_DEVICE_NOT_FOUND; return CL_SUCCESS; } static cl_int cl_device_id_is_ok(const cl_device_id device) { if(UNLIKELY(device == NULL)) return CL_FALSE; return device != cl_get_gt_device() ? CL_FALSE : CL_TRUE; } cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { if(UNLIKELY(platforms == NULL && num_platforms == NULL)) return CL_INVALID_VALUE; if(UNLIKELY(num_entries == 0 && platforms != NULL)) return CL_INVALID_VALUE; return cl_get_platform_ids(num_entries, platforms, num_platforms); } cl_int clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { /* Only one platform. This is easy */ if (UNLIKELY(platform != NULL && platform != cl_get_platform_default())) return CL_INVALID_PLATFORM; return cl_get_platform_info(platform, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices) { cl_int err = CL_SUCCESS; /* Check parameter consistency */ if (UNLIKELY(devices == NULL && num_devices == NULL)) return CL_INVALID_VALUE; if (UNLIKELY(platform && platform != cl_get_platform_default())) return CL_INVALID_PLATFORM; if (UNLIKELY(devices && num_entries == 0)) return CL_INVALID_VALUE; err = cl_check_device_type(device_type); if(err != CL_SUCCESS) return err; return cl_get_device_ids(platform, device_type, num_entries, devices, num_devices); } cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_device_info(device, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clCreateSubDevices(cl_device_id in_device, const cl_device_partition_property * properties, cl_uint num_devices, cl_device_id * out_devices, cl_uint * num_devices_ret) { /* Check parameter consistency */ if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL)) return CL_INVALID_VALUE; if (UNLIKELY(in_device == NULL && properties == NULL)) return CL_INVALID_VALUE; *num_devices_ret = 0; return CL_INVALID_DEVICE_PARTITION_COUNT; } cl_int clRetainDevice(cl_device_id device) { // XXX stub for C++ Bindings return CL_SUCCESS; } cl_int clReleaseDevice(cl_device_id device) { // XXX stub for C++ Bindings return CL_SUCCESS; } cl_context clCreateContext(const cl_context_properties * properties, cl_uint num_devices, const cl_device_id * devices, void (* pfn_notify) (const char*, const void*, size_t, void*), void * user_data, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_context context = NULL; /* Assert parameters correctness */ INVALID_VALUE_IF (devices == NULL); INVALID_VALUE_IF (num_devices == 0); INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL); /* Now check if the user is asking for the right device */ INVALID_DEVICE_IF (cl_device_id_is_ok(*devices) == CL_FALSE); context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); initialize_env_var(); error: if (errcode_ret) *errcode_ret = err; return context; } cl_context clCreateContextFromType(const cl_context_properties * properties, cl_device_type device_type, void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *), void * user_data, cl_int * errcode_ret) { cl_context context = NULL; cl_int err = CL_SUCCESS; cl_device_id devices[1]; cl_uint num_devices = 1; INVALID_VALUE_IF (pfn_notify == NULL && user_data != NULL); err = cl_check_device_type(device_type); if(err != CL_SUCCESS) { goto error; } err = cl_get_device_ids(NULL, device_type, 1, &devices[0], &num_devices); if (err != CL_SUCCESS) { goto error; } context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); error: if (errcode_ret) *errcode_ret = err; return context; } cl_int clRetainContext(cl_context context) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_context_add_ref(context); error: return err; } cl_int clReleaseContext(cl_context context) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_context_delete(context); error: return err; } cl_int clGetContextInfo(cl_context context, cl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (param_name == CL_CONTEXT_DEVICES) { FILL_GETINFO_RET (cl_device_id, 1, &context->device, CL_SUCCESS); } else if (param_name == CL_CONTEXT_NUM_DEVICES) { cl_uint n = 1; FILL_GETINFO_RET (cl_uint, 1, &n, CL_SUCCESS); } else if (param_name == CL_CONTEXT_REFERENCE_COUNT) { cl_uint ref = context->ref_n; FILL_GETINFO_RET (cl_uint, 1, &ref, CL_SUCCESS); } else if (param_name == CL_CONTEXT_PROPERTIES) { if(context->prop_len > 0) { FILL_GETINFO_RET (cl_context_properties, context->prop_len, context->prop_user, CL_SUCCESS); } else { cl_context_properties n = 0; FILL_GETINFO_RET (cl_context_properties, 1, &n, CL_SUCCESS); } } else { return CL_INVALID_VALUE; } error: return err; } cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int * errcode_ret) { cl_command_queue queue = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); INVALID_DEVICE_IF (device != context->device); INVALID_VALUE_IF (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)); if(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {/*not supported now.*/ err = CL_INVALID_QUEUE_PROPERTIES; goto error; } queue = cl_context_create_queue(context, device, properties, &err); error: if (errcode_ret) *errcode_ret = err; return queue; } cl_int clRetainCommandQueue(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); cl_command_queue_add_ref(command_queue); error: return err; } cl_int clReleaseCommandQueue(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); cl_command_queue_delete(command_queue); error: return err; } cl_int clGetCommandQueueInfo(cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); if (param_name == CL_QUEUE_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &command_queue->ctx, CL_SUCCESS); } else if (param_name == CL_QUEUE_DEVICE) { FILL_GETINFO_RET (cl_device_id, 1, &command_queue->ctx->device, CL_SUCCESS); } else if (param_name == CL_QUEUE_REFERENCE_COUNT) { cl_uint ref = command_queue->ref_n; FILL_GETINFO_RET (cl_uint, 1, &ref, CL_SUCCESS); } else if (param_name == CL_QUEUE_PROPERTIES) { FILL_GETINFO_RET (cl_command_queue_properties, 1, &command_queue->props, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_buffer(context, flags, size, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateSubBuffer(cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, const void * buffer_create_info, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_MEM(buffer); mem = cl_mem_new_sub_buffer(buffer, flags, buffer_create_type, buffer_create_info, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (image_format == NULL) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (image_format->image_channel_order < CL_R || image_format->image_channel_order > CL_RGBx) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (image_format->image_channel_data_type < CL_SNORM_INT8 || image_format->image_channel_data_type > CL_FLOAT) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } if (image_desc == NULL) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } if (image_desc->image_type <= CL_MEM_OBJECT_BUFFER || image_desc->image_type > CL_MEM_OBJECT_IMAGE1D_BUFFER) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } /* buffer refers to a valid buffer memory object if image_type is CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */ if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER && image_desc->buffer) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } if (image_desc->num_mip_levels || image_desc->num_samples) { err = CL_INVALID_IMAGE_DESCRIPTOR; goto error; } /* Other details check for image_desc will leave to image create. */ mem = cl_mem_new_image(context, flags, image_format, image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_row_pitch, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_image_desc image_desc; memset(&image_desc, 0, sizeof(image_desc)); image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_row_pitch = image_row_pitch; mem = cl_mem_new_image(context, flags, image_format, &image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_mem clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, cl_int * errcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_depth = image_depth; image_desc.image_row_pitch = image_row_pitch; image_desc.image_slice_pitch = image_slice_pitch; mem = cl_mem_new_image(context, flags, image_format, &image_desc, host_ptr, &err); error: if (errcode_ret) *errcode_ret = err; return mem; } cl_int clRetainMemObject(cl_mem memobj) { cl_int err = CL_SUCCESS; CHECK_MEM (memobj); cl_mem_add_ref(memobj); error: return err; } cl_int clReleaseMemObject(cl_mem memobj) { cl_int err = CL_SUCCESS; CHECK_MEM (memobj); cl_mem_delete(memobj); error: return err; } cl_int clGetSupportedImageFormats(cl_context ctx, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format * image_formats, cl_uint * num_image_formats) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (ctx); if (UNLIKELY(num_entries == 0 && image_formats != NULL)) { err = CL_INVALID_VALUE; goto error; } if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE1D && image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE2D && image_type != CL_MEM_OBJECT_IMAGE3D)) { err = CL_INVALID_VALUE; goto error; } err = cl_image_get_supported_fmt(ctx, image_type, num_entries, image_formats, num_image_formats); error: return err; } cl_int clGetMemObjectInfo(cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_MEM(memobj); err = cl_get_mem_object_info(memobj, param_name, param_value_size, param_value, param_value_size_ret); error: return err; } cl_int clGetImageInfo(cl_mem mem, cl_image_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_image_info(mem, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clSetMemObjectDestructorCallback(cl_mem memobj, void (CL_CALLBACK *pfn_notify) (cl_mem, void*), void * user_data) { cl_int err = CL_SUCCESS; CHECK_MEM(memobj); INVALID_VALUE_IF (pfn_notify == 0); cl_mem_dstr_cb *cb = (cl_mem_dstr_cb*)malloc(sizeof(cl_mem_dstr_cb)); if (!cb) { err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(cb, 0, sizeof(cl_mem_dstr_cb)); cb->pfn_notify = pfn_notify; cb->user_data = user_data; cb->next = memobj->dstr_cb; memobj->dstr_cb = cb; error: return err; } cl_sampler clCreateSampler(cl_context context, cl_bool normalized, cl_addressing_mode addressing, cl_filter_mode filter, cl_int * errcode_ret) { cl_sampler sampler = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); sampler = cl_sampler_new(context, normalized, addressing, filter, &err); error: if (errcode_ret) *errcode_ret = err; return sampler; } cl_int clRetainSampler(cl_sampler sampler) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); cl_sampler_add_ref(sampler); error: return err; } cl_int clReleaseSampler(cl_sampler sampler) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); cl_sampler_delete(sampler); error: return err; } cl_int clGetSamplerInfo(cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_SAMPLER (sampler); if (param_name == CL_SAMPLER_REFERENCE_COUNT) { FILL_GETINFO_RET (cl_uint, 1, (cl_uint*)&sampler->ref_n, CL_SUCCESS); } else if (param_name == CL_SAMPLER_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &sampler->ctx, CL_SUCCESS); } else if (param_name == CL_SAMPLER_NORMALIZED_COORDS) { FILL_GETINFO_RET (cl_bool, 1, &sampler->normalized_coords, CL_SUCCESS); } else if (param_name == CL_SAMPLER_ADDRESSING_MODE) { FILL_GETINFO_RET (cl_addressing_mode, 1, &sampler->address, CL_SUCCESS); } else if (param_name == CL_SAMPLER_FILTER_MODE ) { FILL_GETINFO_RET (cl_filter_mode, 1, &sampler->filter, CL_SUCCESS); } else{ return CL_INVALID_VALUE; } error: return err; } cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char ** strings, const size_t * lengths, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; cl_uint i; CHECK_CONTEXT (context); INVALID_VALUE_IF (count == 0); INVALID_VALUE_IF (strings == NULL); for(i = 0; i < count; i++) { if(UNLIKELY(strings[i] == NULL)) { err = CL_INVALID_VALUE; goto error; } } program = cl_program_create_from_source(context, count, strings, lengths, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); program = cl_program_create_from_binary(context, num_devices, devices, lengths, binaries, binary_status, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_program clCreateProgramWithBuiltInKernels(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * kernel_names, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); INVALID_VALUE_IF (kernel_names == NULL); program = cl_program_create_with_built_in_kernles(context, num_devices, device_list, kernel_names, &err); error: if (errcode_ret) *errcode_ret = err; return program; } cl_int clRetainProgram(cl_program program) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); cl_program_add_ref(program); error: return err; } cl_int clReleaseProgram(cl_program program) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); cl_program_delete(program); error: return err; } cl_int clBuildProgram(cl_program program, cl_uint num_devices, const cl_device_id * device_list, const char * options, void (CL_CALLBACK *pfn_notify) (cl_program, void*), void * user_data) { cl_int err = CL_SUCCESS; CHECK_PROGRAM(program); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); /* Everything is easy. We only support one device anyway */ if (num_devices != 0) { assert(program->ctx); INVALID_DEVICE_IF (device_list[0] != program->ctx->device); } /* TODO support create program from binary */ assert(program->source_type == FROM_LLVM || program->source_type == FROM_SOURCE || program->source_type == FROM_LLVM_SPIR || program->source_type == FROM_BINARY); if((err = cl_program_build(program, options)) != CL_SUCCESS) { goto error; } program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: return err; } cl_int clCompileProgram(cl_program program , cl_uint num_devices , const cl_device_id * device_list , const char * options , cl_uint num_input_headers , const cl_program * input_headers , const char ** header_include_names , void (CL_CALLBACK * pfn_notify )(cl_program, void *), void * user_data ) { cl_int err = CL_SUCCESS; CHECK_PROGRAM(program); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); INVALID_VALUE_IF (num_input_headers == 0 && input_headers != NULL); INVALID_VALUE_IF (num_input_headers != 0 && input_headers == NULL); /* Everything is easy. We only support one device anyway */ if (num_devices != 0) { assert(program->ctx); INVALID_DEVICE_IF (device_list[0] != program->ctx->device); } /* TODO support create program from binary */ assert(program->source_type == FROM_LLVM || program->source_type == FROM_SOURCE || program->source_type == FROM_BINARY); if((err = cl_program_compile(program, num_input_headers, input_headers, header_include_names, options)) != CL_SUCCESS) { goto error; } program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: return err; } cl_program clLinkProgram(cl_context context, cl_uint num_devices, const cl_device_id * device_list, const char * options, cl_uint num_input_programs, const cl_program * input_programs, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), void * user_data, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_program program = NULL; CHECK_CONTEXT (context); INVALID_VALUE_IF (num_devices > 1); INVALID_VALUE_IF (num_devices == 0 && device_list != NULL); INVALID_VALUE_IF (num_devices != 0 && device_list == NULL); INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL); INVALID_VALUE_IF (num_input_programs == 0 && input_programs != NULL); INVALID_VALUE_IF (num_input_programs != 0 && input_programs == NULL); INVALID_VALUE_IF (num_input_programs == 0 && input_programs == NULL); program = cl_program_link(context, num_input_programs, input_programs, options, &err); if(program) program->is_built = CL_TRUE; if (pfn_notify) pfn_notify(program, user_data); error: if (errcode_ret) *errcode_ret = err; return program; } cl_int clUnloadCompiler(void) { return CL_SUCCESS; } cl_int clUnloadPlatformCompiler(cl_platform_id platform) { return CL_SUCCESS; } cl_int clGetProgramInfo(cl_program program, cl_program_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; char * ret_str = ""; CHECK_PROGRAM (program); if (param_name == CL_PROGRAM_REFERENCE_COUNT) { cl_uint ref = program->ref_n; FILL_GETINFO_RET (cl_uint, 1, (&ref), CL_SUCCESS); } else if (param_name == CL_PROGRAM_CONTEXT) { cl_context context = program->ctx; FILL_GETINFO_RET (cl_context, 1, &context, CL_SUCCESS); } else if (param_name == CL_PROGRAM_NUM_DEVICES) { cl_uint num_dev = 1; // Just 1 dev now. FILL_GETINFO_RET (cl_uint, 1, &num_dev, CL_SUCCESS); } else if (param_name == CL_PROGRAM_DEVICES) { cl_device_id dev_id = program->ctx->device; FILL_GETINFO_RET (cl_device_id, 1, &dev_id, CL_SUCCESS); } else if (param_name == CL_PROGRAM_NUM_KERNELS) { cl_uint kernels_num = program->ker_n; FILL_GETINFO_RET (cl_uint, 1, &kernels_num, CL_SUCCESS); } else if (param_name == CL_PROGRAM_SOURCE) { if (!program->source) FILL_GETINFO_RET (char, 1, &ret_str, CL_SUCCESS); FILL_GETINFO_RET (char, (strlen(program->source) + 1), program->source, CL_SUCCESS); } else if(param_name == CL_PROGRAM_KERNEL_NAMES) { cl_program_get_kernel_names(program, param_value_size, (char *)param_value, param_value_size_ret); } else if (param_name == CL_PROGRAM_BINARY_SIZES) { if (program->binary == NULL){ if( program->binary_type == CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 0); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 1); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 2); }else{ return CL_INVALID_BINARY; } } if (program->binary == NULL || program->binary_sz == 0) { return CL_OUT_OF_RESOURCES; } FILL_GETINFO_RET (size_t, 1, (&program->binary_sz), CL_SUCCESS); } else if (param_name == CL_PROGRAM_BINARIES) { if (param_value_size_ret) *param_value_size_ret = sizeof(void*); if (!param_value) return CL_SUCCESS; /* param_value points to an array of n pointers allocated by the caller */ if (program->binary == NULL) { if( program->binary_type == CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 0); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 1); }else if( program->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY) { program->binary_sz = compiler_program_serialize_to_binary(program->opaque, &program->binary, 2); }else{ return CL_INVALID_BINARY; } } if (program->binary == NULL || program->binary_sz == 0) { return CL_OUT_OF_RESOURCES; } memcpy(*((void **)param_value), program->binary, program->binary_sz); return CL_SUCCESS; } else { return CL_INVALID_VALUE; } error: return err; } cl_int clGetProgramBuildInfo(cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; char * ret_str = ""; CHECK_PROGRAM (program); INVALID_DEVICE_IF (device != program->ctx->device); if (param_name == CL_PROGRAM_BUILD_STATUS) { FILL_GETINFO_RET (cl_build_status, 1, &program->build_status, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BUILD_OPTIONS) { if (program->is_built && program->build_opts) ret_str = program->build_opts; FILL_GETINFO_RET (char, (strlen(ret_str)+1), ret_str, CL_SUCCESS); } else if (param_name == CL_PROGRAM_BUILD_LOG) { FILL_GETINFO_RET (char, program->build_log_sz + 1, program->build_log, CL_SUCCESS); if (param_value_size_ret) *param_value_size_ret = program->build_log_sz + 1; }else if (param_name == CL_PROGRAM_BINARY_TYPE){ FILL_GETINFO_RET (cl_uint, 1, &program->binary_type, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_kernel clCreateKernel(cl_program program, const char * kernel_name, cl_int * errcode_ret) { cl_kernel kernel = NULL; cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); if (program->ker_n <= 0) { err = CL_INVALID_PROGRAM_EXECUTABLE; goto error; } INVALID_VALUE_IF (kernel_name == NULL); kernel = cl_program_create_kernel(program, kernel_name, &err); error: if (errcode_ret) *errcode_ret = err; return kernel; } cl_int clCreateKernelsInProgram(cl_program program, cl_uint num_kernels, cl_kernel * kernels, cl_uint * num_kernels_ret) { cl_int err = CL_SUCCESS; CHECK_PROGRAM (program); if (program->ker_n <= 0) { err = CL_INVALID_PROGRAM_EXECUTABLE; goto error; } if (kernels && num_kernels < program->ker_n) { err = CL_INVALID_VALUE; goto error; } if(num_kernels_ret) *num_kernels_ret = program->ker_n; if(kernels) err = cl_program_create_kernels_in_program(program, kernels); error: return err; } cl_int clRetainKernel(cl_kernel kernel) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); cl_kernel_add_ref(kernel); error: return err; } cl_int clReleaseKernel(cl_kernel kernel) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); cl_kernel_delete(kernel); error: return err; } cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void * arg_value) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value); error: return err; } cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_KERNEL(kernel); if(kernel->program->build_opts == NULL || strstr(kernel->program->build_opts,"-cl-kernel-arg-info") == NULL ) { err = CL_KERNEL_ARG_INFO_NOT_AVAILABLE; goto error; } if (param_name != CL_KERNEL_ARG_ADDRESS_QUALIFIER && param_name != CL_KERNEL_ARG_ACCESS_QUALIFIER && param_name != CL_KERNEL_ARG_TYPE_NAME && param_name != CL_KERNEL_ARG_TYPE_QUALIFIER && param_name != CL_KERNEL_ARG_NAME) { err = CL_INVALID_VALUE; goto error; } if (arg_index >= kernel->arg_n) { err = CL_INVALID_ARG_INDEX; goto error; } err = cl_get_kernel_arg_info(kernel, arg_index, param_name, param_value_size, param_value, param_value_size_ret); error: return err; } cl_int clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err; CHECK_KERNEL(kernel); if (param_name == CL_KERNEL_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &kernel->program->ctx, CL_SUCCESS); } else if (param_name == CL_KERNEL_PROGRAM) { FILL_GETINFO_RET (cl_program, 1, &kernel->program, CL_SUCCESS); } else if (param_name == CL_KERNEL_NUM_ARGS) { cl_uint n = kernel->arg_n; FILL_GETINFO_RET (cl_uint, 1, &n, CL_SUCCESS); } else if (param_name == CL_KERNEL_REFERENCE_COUNT) { cl_int ref = kernel->ref_n; FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS); } else if (param_name == CL_KERNEL_FUNCTION_NAME) { const char * n = cl_kernel_get_name(kernel); FILL_GETINFO_RET (cl_char, strlen(n)+1, n, CL_SUCCESS); } else if (param_name == CL_KERNEL_ATTRIBUTES) { const char * n = cl_kernel_get_attributes(kernel); FILL_GETINFO_RET (cl_char, strlen(n)+1, n, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { return cl_get_kernel_workgroup_info(kernel, device, param_name, param_value_size, param_value, param_value_size_ret); } cl_int clWaitForEvents(cl_uint num_events, const cl_event * event_list) { cl_int err = CL_SUCCESS; cl_context ctx = NULL; if(num_events > 0 && event_list) ctx = event_list[0]->ctx; TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx); while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) { usleep(8000); //sleep 8ms to wait other thread } error: return err; } cl_int clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if (param_name == CL_EVENT_COMMAND_QUEUE) { FILL_GETINFO_RET (cl_command_queue, 1, &event->queue, CL_SUCCESS); } else if (param_name == CL_EVENT_CONTEXT) { FILL_GETINFO_RET (cl_context, 1, &event->ctx, CL_SUCCESS); } else if (param_name == CL_EVENT_COMMAND_TYPE) { FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS); } else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) { cl_event_update_status(event, 0); FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS); } else if (param_name == CL_EVENT_REFERENCE_COUNT) { cl_uint ref = event->ref_n; FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS); } else { return CL_INVALID_VALUE; } error: return err; } cl_event clCreateUserEvent(cl_context context, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; cl_event event = NULL; CHECK_CONTEXT(context); TRY_ALLOC(event, cl_event_new(context, NULL, CL_COMMAND_USER, CL_TRUE)); error: if(errcode_ret) *errcode_ret = err; return event; } cl_int clRetainEvent(cl_event event) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); cl_event_add_ref(event); error: return err; } cl_int clReleaseEvent(cl_event event) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); cl_event_delete(event); error: return err; } cl_int clSetUserEventStatus(cl_event event, cl_int execution_status) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if(execution_status > CL_COMPLETE) { err = CL_INVALID_VALUE; goto error; } if(event->status != CL_SUBMITTED) { err = CL_INVALID_OPERATION; goto error; } cl_event_set_status(event, execution_status); error: return err; } cl_int clSetEventCallback(cl_event event, cl_int command_exec_callback_type, void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *), void * user_data) { cl_int err = CL_SUCCESS; CHECK_EVENT(event); if((pfn_notify == NULL) || (command_exec_callback_type > CL_SUBMITTED) || (command_exec_callback_type < CL_COMPLETE)) { err = CL_INVALID_VALUE; goto error; } err = cl_event_set_callback(event, command_exec_callback_type, pfn_notify, user_data); error: return err; } cl_int clGetEventProfilingInfo(cl_event event, cl_profiling_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int err = CL_SUCCESS; cl_ulong ret_val; CHECK_EVENT(event); cl_event_update_status(event, 0); if (event->type == CL_COMMAND_USER || !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) || event->status != CL_COMPLETE) { err = CL_PROFILING_INFO_NOT_AVAILABLE; goto error; } if (param_value && param_value_size < sizeof(cl_ulong)) { err = CL_INVALID_VALUE; goto error; } if (param_name == CL_PROFILING_COMMAND_QUEUED) { ret_val = event->queued_timestamp; } else if (param_name == CL_PROFILING_COMMAND_SUBMIT) { ret_val= event->queued_timestamp + cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[1]); } else if (param_name == CL_PROFILING_COMMAND_START) { err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START); ret_val = event->queued_timestamp + cl_event_get_start_timestamp(event); } else if (param_name == CL_PROFILING_COMMAND_END) { err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END); ret_val = event->queued_timestamp + cl_event_get_end_timestamp(event); } else { err = CL_INVALID_VALUE; goto error; } if (err == CL_SUCCESS) { if (param_value) *(cl_ulong*)param_value = ret_val; if (param_value_size_ret) *param_value_size_ret = sizeof(cl_ulong); } error: return err; } cl_int clFlush(cl_command_queue command_queue) { /* have nothing to do now, as currently * clEnqueueNDRangeKernel will flush at * the end of each calling. we may need * to optimize it latter.*/ return 0; } cl_int clFinish(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE (command_queue); err = cl_command_queue_finish(command_queue); error: return err; } cl_int clEnqueueReadBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t size, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, defer_enqueue_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &defer_enqueue_data; data->type = EnqueueReadBuffer; data->mem_obj = buffer; data->ptr = ptr; data->offset = offset; data->size = size; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueReadBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(buffer_row_pitch == 0) buffer_row_pitch = region[0]; if(buffer_slice_pitch == 0) buffer_slice_pitch = region[1] * buffer_row_pitch; if(host_row_pitch == 0) host_row_pitch = region[0]; if(host_slice_pitch == 0) host_slice_pitch = region[1] * host_row_pitch; if (buffer_row_pitch < region[0] || host_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) || (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch + buffer_origin[0] + region[0] > buffer->size) { err = CL_INVALID_VALUE; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueReadBufferRect; data->mem_obj = buffer; data->ptr = ptr; data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2]; data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = buffer_row_pitch; data->slice_pitch = buffer_slice_pitch; data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t size, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!ptr || !size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueWriteBuffer; data->mem_obj = buffer; data->const_ptr = ptr; data->offset = offset; data->size = size; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteBufferRect(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, const size_t * buffer_origin, const size_t * host_origin, const size_t * region, size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(buffer_row_pitch == 0) buffer_row_pitch = region[0]; if(buffer_slice_pitch == 0) buffer_slice_pitch = region[1] * buffer_row_pitch; if(host_row_pitch == 0) host_row_pitch = region[0]; if(host_slice_pitch == 0) host_slice_pitch = region[1] * host_row_pitch; if (buffer_row_pitch < region[0] || host_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((buffer_slice_pitch < region[1] * buffer_row_pitch || buffer_slice_pitch % buffer_row_pitch != 0 ) || (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch % host_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((buffer_origin[2] + region[2] - 1) * buffer_slice_pitch + (buffer_origin[1] + region[1] - 1) * buffer_row_pitch + buffer_origin[0] + region[0] > buffer->size) { err = CL_INVALID_VALUE; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueWriteBufferRect; data->mem_obj = buffer; data->const_ptr = ptr; data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2]; data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = buffer_row_pitch; data->slice_pitch = buffer_slice_pitch; data->host_row_pitch = host_row_pitch; data->host_slice_pitch = host_slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const void * fill_color, const size_t * porigin, const size_t * pregion, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(image, src_image); FIXUP_IMAGE_REGION(src_image, pregion, region); FIXUP_IMAGE_ORIGIN(src_image, porigin, origin); if (command_queue->ctx != image->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (fill_color == NULL) { err = CL_INVALID_VALUE; goto error; } if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){ err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){ err = CL_INVALID_VALUE; goto error; } err = cl_image_fill(command_queue, fill_color, src_image, origin, region); if (err) { goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx); data = &no_wait_data; data->type = EnqueueFillImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue); return 0; error: return err; } cl_int clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer, const void * pattern, size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; static size_t valid_sz[] = {1, 2, 4, 8, 16, 32, 64, 128}; int i = 0; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if (pattern == NULL) { err = CL_INVALID_VALUE; goto error; } for (i = 0; i < sizeof(valid_sz) / sizeof(size_t); i++) { if (valid_sz[i] == pattern_size) break; } if (i == sizeof(valid_sz) / sizeof(size_t)) { err = CL_INVALID_VALUE; goto error; } if (offset % pattern_size || size % pattern_size) { err = CL_INVALID_VALUE; goto error; } err = cl_mem_fill(command_queue, pattern, pattern_size, buffer, offset, size); if (err) { goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueFillBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_fill_buffer", "", command_queue); return 0; error: return err; } cl_int clEnqueueCopyBuffer(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_MEM(dst_buffer); if (command_queue->ctx != src_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (command_queue->ctx != dst_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_offset + cb > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (dst_offset + cb > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } /* Check overlap */ if (src_buffer == dst_buffer && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1) && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) { err = CL_MEM_COPY_OVERLAP; goto error; } /* Check sub overlap */ if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type == CL_MEM_SUBBUFFER_TYPE ) { struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer; struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer; size_t src_sub_offset = src_b->sub_offset; size_t dst_sub_offset = dst_b->sub_offset; if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1) && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) { err = CL_MEM_COPY_OVERLAP; goto error; } } err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); data = &no_wait_data; data->type = EnqueueCopyBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy", "", command_queue); return 0; error: return err; } cl_int clEnqueueCopyBufferRect(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t * src_origin, const size_t * dst_origin, const size_t * region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_MEM(dst_buffer); if ((command_queue->ctx != src_buffer->ctx) || (command_queue->ctx != dst_buffer->ctx)) { err = CL_INVALID_CONTEXT; goto error; } if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) { err = CL_INVALID_VALUE; goto error; } if(src_row_pitch == 0) src_row_pitch = region[0]; if(src_slice_pitch == 0) src_slice_pitch = region[1] * src_row_pitch; if(dst_row_pitch == 0) dst_row_pitch = region[0]; if(dst_slice_pitch == 0) dst_slice_pitch = region[1] * dst_row_pitch; if (src_row_pitch < region[0] || dst_row_pitch < region[0]) { err = CL_INVALID_VALUE; goto error; } if ((src_slice_pitch < region[1] * src_row_pitch || src_slice_pitch % src_row_pitch != 0 ) || (dst_slice_pitch < region[1] * dst_row_pitch || dst_slice_pitch % dst_row_pitch != 0 )) { err = CL_INVALID_VALUE; goto error; } if ((src_origin[2] + region[2] - 1) * src_slice_pitch + (src_origin[1] + region[1] - 1) * src_row_pitch + src_origin[0] + region[0] > src_buffer->size ||(dst_origin[2] + region[2] - 1) * dst_slice_pitch + (dst_origin[1] + region[1] - 1) * dst_row_pitch + dst_origin[0] + region[0] > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (src_buffer == dst_buffer && (src_row_pitch != dst_row_pitch || src_slice_pitch != dst_slice_pitch)) { err = CL_INVALID_VALUE; goto error; } if (src_buffer == dst_buffer && check_copy_overlap(src_origin, dst_origin, region, src_row_pitch, src_slice_pitch)) { err = CL_MEM_COPY_OVERLAP; goto error; } cl_mem_copy_buffer_rect(command_queue, src_buffer, dst_buffer, src_origin, dst_origin, region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); data = &no_wait_data; data->type = EnqueueCopyBufferRect; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER_RECT) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_rect", "", command_queue); error: return err; } cl_int clEnqueueReadImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_read, const size_t * porigin, const size_t * pregion, size_t row_pitch, size_t slice_pitch, void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); FIXUP_IMAGE_REGION(image, pregion, region); FIXUP_IMAGE_ORIGIN(image, porigin, origin); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!row_pitch) row_pitch = image->bpp*region[0]; else if (row_pitch < image->bpp*region[0]) { err = CL_INVALID_VALUE; goto error; } if (image->slice_pitch) { if (!slice_pitch) slice_pitch = row_pitch*region[1]; else if (slice_pitch < row_pitch*region[1]) { err = CL_INVALID_VALUE; goto error; } } else if (slice_pitch) { err = CL_INVALID_VALUE; goto error; } if (!ptr) { err = CL_INVALID_VALUE; goto error; } if (mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueReadImage; data->mem_obj = mem; data->ptr = ptr; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueWriteImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_write, const size_t * porigin, const size_t * pregion, size_t row_pitch, size_t slice_pitch, const void * ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); FIXUP_IMAGE_REGION(image, pregion, region); FIXUP_IMAGE_ORIGIN(image, porigin, origin); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!row_pitch) row_pitch = image->bpp*region[0]; else if (row_pitch < image->bpp*region[0]) { err = CL_INVALID_VALUE; goto error; } if (image->slice_pitch) { if (!slice_pitch) slice_pitch = row_pitch*region[1]; else if (slice_pitch < row_pitch*region[1]) { err = CL_INVALID_VALUE; goto error; } } else if (slice_pitch) { err = CL_INVALID_VALUE; goto error; } if (!ptr) { err = CL_INVALID_VALUE; goto error; } if (mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueWriteImage; data->mem_obj = mem; data->const_ptr = ptr; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = row_pitch; data->slice_pitch = slice_pitch; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_WRITE_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueCopyImage(cl_command_queue command_queue, cl_mem src_mem, cl_mem dst_mem, const size_t * psrc_origin, const size_t * pdst_origin, const size_t * pregion, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; cl_bool overlap = CL_TRUE; cl_int i = 0; CHECK_QUEUE(command_queue); CHECK_IMAGE(src_mem, src_image); CHECK_IMAGE(dst_mem, dst_image); FIXUP_IMAGE_REGION(src_image, pregion, region); FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin); FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin); if (command_queue->ctx != src_mem->ctx || command_queue->ctx != dst_mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_image->fmt.image_channel_order != dst_image->fmt.image_channel_order || src_image->fmt.image_channel_data_type != dst_image->fmt.image_channel_data_type) { err = CL_IMAGE_FORMAT_MISMATCH; goto error; } if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { err = CL_INVALID_VALUE; goto error; } if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) || (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1))) { err = CL_INVALID_VALUE; goto error; } if (src_image == dst_image) { for(i = 0; i < 3; i++) overlap = overlap && (src_origin[i] < dst_origin[i] + region[i]) && (dst_origin[i] < src_origin[i] + region[i]); if(overlap == CL_TRUE) { err = CL_MEM_COPY_OVERLAP; goto error; } } cl_mem_kernel_copy_image(command_queue, src_image, dst_image, src_origin, dst_origin, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_kernel_copy_image", "", command_queue); error: return err; } cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_mem, cl_mem dst_buffer, const size_t * psrc_origin, const size_t * pregion, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(src_mem, src_image); CHECK_MEM(dst_buffer); FIXUP_IMAGE_REGION(src_image, pregion, region); FIXUP_IMAGE_ORIGIN(src_image, psrc_origin, src_origin); if (command_queue->ctx != src_mem->ctx || command_queue->ctx != dst_buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { err = CL_INVALID_VALUE; goto error; } if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) { err = CL_INVALID_VALUE; goto error; } cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer, src_origin, dst_offset, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyImageToBuffer; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_image_to_buffer", "", command_queue); error: return err; } cl_int clEnqueueCopyBufferToImage(cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_mem, size_t src_offset, const size_t * pdst_origin, const size_t * pregion, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(src_buffer); CHECK_IMAGE(dst_mem, dst_image); FIXUP_IMAGE_REGION(dst_image, pregion, region); FIXUP_IMAGE_ORIGIN(dst_image, pdst_origin, dst_origin); if (command_queue->ctx != src_buffer->ctx || command_queue->ctx != dst_mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (src_offset + region[0]*region[1]*region[2]*dst_image->bpp > src_buffer->size) { err = CL_INVALID_VALUE; goto error; } if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { err = CL_INVALID_VALUE; goto error; } if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1)) { err = CL_INVALID_VALUE; goto error; } cl_mem_copy_buffer_to_image(command_queue, src_buffer, dst_image, src_offset, dst_origin, region); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, dst_mem->ctx); data = &no_wait_data; data->type = EnqueueCopyBufferToImage; data->queue = command_queue; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_to_image", "", command_queue); error: return err; } static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset, size_t size, const size_t *origin, const size_t *region) { cl_int slot = -1; int err = CL_SUCCESS; size_t sub_offset = 0; if(mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; sub_offset = buffer->sub_offset; } ptr = (char*)ptr + offset + sub_offset; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); //only calc ptr here, will do memcpy in enqueue *mem_ptr = (char *)mem->host_ptr + offset + sub_offset; } else { *mem_ptr = ptr; } /* Record the mapped address. */ if (!mem->mapped_ptr_sz) { mem->mapped_ptr_sz = 16; mem->mapped_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz); if (!mem->mapped_ptr) { cl_mem_unmap_auto(mem); err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(mem->mapped_ptr, 0, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); slot = 0; } else { int i = 0; for (; i < mem->mapped_ptr_sz; i++) { if (mem->mapped_ptr[i].ptr == NULL) { slot = i; break; } } if (i == mem->mapped_ptr_sz) { cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2); if (!new_ptr) { cl_mem_unmap_auto(mem); err = CL_OUT_OF_HOST_MEMORY; goto error; } memset(new_ptr, 0, 2 * mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); memcpy(new_ptr, mem->mapped_ptr, mem->mapped_ptr_sz * sizeof(cl_mapped_ptr)); slot = mem->mapped_ptr_sz; mem->mapped_ptr_sz *= 2; free(mem->mapped_ptr); mem->mapped_ptr = new_ptr; } } assert(slot != -1); mem->mapped_ptr[slot].ptr = *mem_ptr; mem->mapped_ptr[slot].v_ptr = ptr; mem->mapped_ptr[slot].size = size; if(origin) { assert(region); mem->mapped_ptr[slot].origin[0] = origin[0]; mem->mapped_ptr[slot].origin[1] = origin[1]; mem->mapped_ptr[slot].origin[2] = origin[2]; mem->mapped_ptr[slot].region[0] = region[0]; mem->mapped_ptr[slot].region[1] = region[1]; mem->mapped_ptr[slot].region[2] = region[2]; } mem->map_ref++; error: if (err != CL_SUCCESS) *mem_ptr = NULL; return err; } void * clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; void *ptr = NULL; void *mem_ptr = NULL; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); if (command_queue->ctx != buffer->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!size || offset + size > buffer->size) { err = CL_INVALID_VALUE; goto error; } if ((map_flags & CL_MAP_READ && buffer->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) && buffer->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; data->type = EnqueueMapBuffer; data->mem_obj = buffer; data->offset = offset; data->size = size; data->ptr = ptr; data->unsync_map = 1; if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) data->write_map = 1; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { data->unsync_map = 0; err = cl_enqueue_handle(event ? *event : NULL, data); if (err != CL_SUCCESS) goto error; ptr = data->ptr; if(event) cl_event_set_status(*event, CL_COMPLETE); } else { if (buffer->is_userptr) ptr = buffer->host_ptr; else { if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { err = CL_MAP_FAILURE; goto error; } } } err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL); if (err != CL_SUCCESS) goto error; error: if (errcode_ret) *errcode_ret = err; return mem_ptr; } void * clEnqueueMapImage(cl_command_queue command_queue, cl_mem mem, cl_bool blocking_map, cl_map_flags map_flags, const size_t * porigin, const size_t * pregion, size_t * image_row_pitch, size_t * image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; void *ptr = NULL; void *mem_ptr = NULL; size_t offset = 0; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_IMAGE(mem, image); FIXUP_IMAGE_REGION(image, pregion, region); FIXUP_IMAGE_ORIGIN(image, porigin, origin); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; } if (!origin || !region || origin[0] + region[0] > image->w || origin[1] + region[1] > image->h || origin[2] + region[2] > image->depth) { err = CL_INVALID_VALUE; goto error; } if (!image_row_pitch || (image->slice_pitch && !image_slice_pitch)) { err = CL_INVALID_VALUE; goto error; } if ((map_flags & CL_MAP_READ && mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) || (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION) && mem->flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS))) { err = CL_INVALID_OPERATION; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx); data = &no_wait_data; data->type = EnqueueMapImage; data->mem_obj = mem; data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2]; data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->ptr = ptr; data->unsync_map = 1; if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) data->write_map = 1; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { data->unsync_map = 0; err = cl_enqueue_handle(event ? *event : NULL, data); if (err != CL_SUCCESS) goto error; ptr = data->ptr; if(event) cl_event_set_status(*event, CL_COMPLETE); } else { if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) { err = CL_MAP_FAILURE; goto error; } } if(mem->flags & CL_MEM_USE_HOST_PTR) { if (image_slice_pitch) *image_slice_pitch = image->host_slice_pitch; *image_row_pitch = image->host_row_pitch; offset = image->bpp*origin[0] + image->host_row_pitch*origin[1] + image->host_slice_pitch*origin[2]; } else { if (image_slice_pitch) *image_slice_pitch = image->slice_pitch; if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) *image_row_pitch = image->slice_pitch; else *image_row_pitch = image->row_pitch; offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; } err = _cl_map_mem(mem, ptr, &mem_ptr, offset, 0, origin, region); error: if (errcode_ret) *errcode_ret = err; return mem_ptr; //TODO: map and unmap first } cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_MEM(memobj); if (command_queue->ctx != memobj->ctx) { err = CL_INVALID_CONTEXT; goto error; } TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, memobj->ctx); data = &no_wait_data; data->type = EnqueueUnmapMemObject; data->mem_obj = memobj; data->ptr = mapped_ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_UNMAP_MEM_OBJECT) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueMigrateMemObjects(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { /* So far, we just support 1 device and no subdevice. So all the command queues belong to the small context. There is no need to migrate the mem objects by now. */ cl_int err = CL_SUCCESS; cl_uint i = 0; enqueue_data *data, defer_enqueue_data = { 0 }; if (!flags & CL_MIGRATE_MEM_OBJECT_HOST) CHECK_QUEUE(command_queue); if (num_mem_objects == 0 || mem_objects == NULL) { err = CL_INVALID_VALUE; goto error; } if (flags && flags & ~(CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) { err = CL_INVALID_VALUE; goto error; } for (i = 0; i < num_mem_objects; i++) { CHECK_MEM(mem_objects[i]); if (mem_objects[i]->ctx != command_queue->ctx) { err = CL_INVALID_CONTEXT; goto error; } } /* really nothing to do, fill the event. */ TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); data = &defer_enqueue_data; data->type = EnqueueMigrateMemObj; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t * global_work_offset, const size_t * global_work_size, const size_t * local_work_size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { size_t fixed_global_off[] = {0,0,0}; size_t fixed_global_sz[] = {1,1,1}; size_t fixed_local_sz[] = {1,1,1}; cl_int err = CL_SUCCESS; cl_uint i; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); CHECK_KERNEL(kernel); /* Check number of dimensions we have */ if (UNLIKELY(work_dim == 0 || work_dim > 3)) { err = CL_INVALID_WORK_DIMENSION; goto error; } /* We need a work size per dimension */ if (UNLIKELY(global_work_size == NULL)) { err = CL_INVALID_GLOBAL_WORK_SIZE; goto error; } if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) { if (UNLIKELY(global_work_offset[i] + global_work_size[i] > (size_t)-1)) { err = CL_INVALID_GLOBAL_OFFSET; goto error; } } /* Local sizes must be non-null and divide global sizes */ if (local_work_size != NULL) for (i = 0; i < work_dim; ++i) if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) { err = CL_INVALID_WORK_GROUP_SIZE; goto error; } /* Queue and kernel must share the same context */ assert(kernel->program); if (command_queue->ctx != kernel->program->ctx) { err = CL_INVALID_CONTEXT; goto error; } /* XXX No event right now */ //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); //FATAL_IF(event_wait_list != NULL, "Events are not supported"); //FATAL_IF(event != NULL, "Events are not supported"); if (local_work_size != NULL) { for (i = 0; i < work_dim; ++i) fixed_local_sz[i] = local_work_size[i]; } else { uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256; //MAX_WORK_GROUP_SIZE may too large for (i = 0; i< work_dim; i++) { for (j = maxDimSize; j > 1; j--) { if (global_work_size[i] % j == 0 && j <= maxGroupSize) { fixed_local_sz[i] = j; maxGroupSize = maxGroupSize /j; maxDimSize = maxGroupSize > maxDimSize ? maxDimSize : maxGroupSize; break; //choose next work_dim } } } } if (global_work_size != NULL) for (i = 0; i < work_dim; ++i) fixed_global_sz[i] = global_work_size[i]; if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) fixed_global_off[i] = global_work_offset[i]; if (kernel->compile_wg_sz[0] || kernel->compile_wg_sz[1] || kernel->compile_wg_sz[2]) { if (fixed_local_sz[0] != kernel->compile_wg_sz[0] || fixed_local_sz[1] != kernel->compile_wg_sz[1] || fixed_local_sz[2] != kernel->compile_wg_sz[2]) { err = CL_INVALID_WORK_GROUP_SIZE; goto error; } } /* Do device specific checks are enqueue the kernel */ err = cl_command_queue_ND_range(command_queue, kernel, work_dim, fixed_global_off, fixed_global_sz, fixed_local_sz); if(err != CL_SUCCESS) goto error; data = &no_wait_data; data->type = EnqueueNDRangeKernel; data->queue = command_queue; TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_NDRANGE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { if (event && (*event)->type != CL_COMMAND_USER && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) { cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT); } err = cl_command_queue_flush(command_queue); } if(b_output_kernel_perf) { if(kernel->program->build_opts != NULL) time_end(command_queue->ctx, cl_kernel_get_name(kernel), kernel->program->build_opts, command_queue); else time_end(command_queue->ctx, cl_kernel_get_name(kernel), "", command_queue); } error: return err; } cl_int clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { const size_t global_size[3] = {1, 0, 0}; const size_t local_size[3] = {1, 0, 0}; return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size, num_events_in_wait_list, event_wait_list, event); } cl_int clEnqueueNativeKernel(cl_command_queue command_queue, void (*user_func)(void *), void * args, size_t cb_args, cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int err = CL_SUCCESS; void *new_args = NULL; enqueue_data *data, no_wait_data = { 0 }; cl_int i; if(user_func == NULL || (args == NULL && cb_args > 0) || (args == NULL && num_mem_objects ==0) || (args != NULL && cb_args == 0) || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) || (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) { err = CL_INVALID_VALUE; goto error; } //Per spec, need copy args if (cb_args) { new_args = malloc(cb_args); if (!new_args) { err = CL_OUT_OF_HOST_MEMORY; goto error; } memcpy(new_args, args, cb_args); for (i=0; ictx); data = &no_wait_data; data->type = EnqueueNativeKernel; data->mem_list = mem_list; data->ptr = new_args; data->size = cb_args; data->offset = (size_t)num_mem_objects; data->const_ptr = args_mem_loc; data->user_func = user_func; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { err = cl_enqueue_handle(event ? *event : NULL, data); if(event) cl_event_set_status(*event, CL_COMPLETE); } error: return err; } cl_int clEnqueueMarker(cl_command_queue command_queue, cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); if(event == NULL) { err = CL_INVALID_VALUE; goto error; } cl_event_marker_with_wait_list(command_queue, 0, NULL, event); error: return err; } cl_int clEnqueueMarkerWithWaitList(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); cl_event_marker_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } cl_int clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, const cl_event * event_list) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); err = clWaitForEvents(num_events, event_list); error: return err; } cl_int clEnqueueBarrier(cl_command_queue command_queue) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); cl_event_barrier_with_wait_list(command_queue, 0, NULL, NULL); error: return err; } cl_int clEnqueueBarrierWithWaitList(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int err = CL_SUCCESS; CHECK_QUEUE(command_queue); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); cl_event_barrier_with_wait_list(command_queue, num_events_in_wait_list, event_wait_list, event); error: return err; } #define EXTFUNC(x) \ if (strcmp(#x, func_name) == 0) \ return (void *)x; static void* internal_clGetExtensionFunctionAddress(const char *func_name) { if (func_name == NULL) return NULL; #ifdef HAS_OCLIcd /* cl_khr_icd */ EXTFUNC(clIcdGetPlatformIDsKHR) #endif EXTFUNC(clCreateProgramWithLLVMIntel) EXTFUNC(clGetGenVersionIntel) EXTFUNC(clMapBufferIntel) EXTFUNC(clUnmapBufferIntel) EXTFUNC(clMapBufferGTTIntel) EXTFUNC(clUnmapBufferGTTIntel) EXTFUNC(clPinBufferIntel) EXTFUNC(clUnpinBufferIntel) EXTFUNC(clReportUnfreedIntel) EXTFUNC(clCreateBufferFromLibvaIntel) EXTFUNC(clCreateImageFromLibvaIntel) EXTFUNC(clGetMemObjectFdIntel) return NULL; } void* clGetExtensionFunctionAddress(const char *func_name) { return internal_clGetExtensionFunctionAddress(func_name); } void* clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, const char *func_name) { if (UNLIKELY(platform != NULL && platform != cl_get_platform_default())) return NULL; return internal_clGetExtensionFunctionAddress(func_name); } #undef EXTFUNC cl_int clReportUnfreedIntel(void) { return cl_report_unfreed(); } void* clMapBufferIntel(cl_mem mem, cl_int *errcode_ret) { void *ptr = NULL; cl_int err = CL_SUCCESS; CHECK_MEM (mem); ptr = cl_mem_map(mem, 1); error: if (errcode_ret) *errcode_ret = err; return ptr; } cl_int clUnmapBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); err = cl_mem_unmap(mem); error: return err; } void* clMapBufferGTTIntel(cl_mem mem, cl_int *errcode_ret) { void *ptr = NULL; cl_int err = CL_SUCCESS; CHECK_MEM (mem); ptr = cl_mem_map_gtt(mem); error: if (errcode_ret) *errcode_ret = err; return ptr; } cl_int clUnmapBufferGTTIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); err = cl_mem_unmap_gtt(mem); error: return err; } cl_int clPinBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); cl_mem_pin(mem); error: return err; } cl_int clUnpinBufferIntel(cl_mem mem) { cl_int err = CL_SUCCESS; CHECK_MEM (mem); cl_mem_unpin(mem); error: return err; } cl_int clGetGenVersionIntel(cl_device_id device, cl_int *ver) { return cl_device_get_version(device, ver); } cl_program clCreateProgramWithLLVMIntel(cl_context context, cl_uint num_devices, const cl_device_id * devices, const char * filename, cl_int * errcode_ret) { return cl_program_create_from_llvm(context, num_devices, devices, filename, errcode_ret); } cl_mem clCreateBufferFromLibvaIntel(cl_context context, unsigned int bo_name, cl_int *errorcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); mem = cl_mem_new_libva_buffer(context, bo_name, &err); error: if (errorcode_ret) *errorcode_ret = err; return mem; } cl_mem clCreateImageFromLibvaIntel(cl_context context, const cl_libva_image *info, cl_int *errorcode_ret) { cl_mem mem = NULL; cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); if (!info) { err = CL_INVALID_VALUE; goto error; } mem = cl_mem_new_libva_image(context, info->bo_name, info->offset, info->width, info->height, info->fmt, info->row_pitch, &err); error: if (errorcode_ret) *errorcode_ret = err; return mem; } extern CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectFdIntel(cl_context context, cl_mem memobj, int* fd) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); CHECK_MEM (memobj); err = cl_mem_get_fd(memobj, fd); error: return err; } Beignet-1.1.1-Source/src/cl_enqueue.h000664 001750 001750 00000005374 12576733264 016572 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Rong Yang */ #ifndef __CL_ENQUEUE_H__ #define __CL_ENQUEUE_H__ #include "cl_internals.h" #include "cl_driver.h" #include "CL/cl.h" typedef enum { EnqueueReadBuffer = 0, EnqueueReadBufferRect, EnqueueWriteBuffer, EnqueueWriteBufferRect, EnqueueCopyBuffer, EnqueueCopyBufferRect, EnqueueReadImage, EnqueueWriteImage, EnqueueCopyImage, EnqueueCopyImageToBuffer, EnqueueCopyBufferToImage, EnqueueMapBuffer, EnqueueMapImage, EnqueueUnmapMemObject, EnqueueNDRangeKernel, EnqueueNativeKernel, EnqueueMarker, EnqueueBarrier, EnqueueFillBuffer, EnqueueFillImage, EnqueueMigrateMemObj, EnqueueInvalid } enqueue_type; typedef struct _enqueue_data { enqueue_type type; /* Command type */ cl_mem mem_obj; /* Enqueue's cl_mem */ cl_command_queue queue; /* Command queue */ size_t offset; /* Mem object's offset */ size_t size; /* Size */ size_t origin[3]; /* Origin */ size_t host_origin[3]; /* Origin */ size_t region[3]; /* Region */ size_t row_pitch; /* Row pitch */ size_t slice_pitch; /* Slice pitch */ size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect */ size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */ const void * const_ptr; /* Const ptr for memory read */ void * ptr; /* Ptr for write and return value */ const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */ uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */ uint8_t write_map; /* Indicate if the clEnqueueMapBuffer is write enable */ void (*user_func)(void *); /* pointer to a host-callable user function */ } enqueue_data; /* Do real enqueue commands */ cl_int cl_enqueue_handle(cl_event event, enqueue_data* data); #endif /* __CL_ENQUEUE_H__ */ Beignet-1.1.1-Source/src/performance.c000664 001750 001750 00000023702 12576733264 016734 0ustar00yryr000000 000000 #include #include #include #include #include #include #include #define MAX_KERNEL_NAME_LENGTH 100 #define MAX_KERNEL_EXECUTION_COUNT 100000 #define MAX_KERNEL_BUILD_OPT 1000 typedef struct kernel_storage_node { char kernel_name[MAX_KERNEL_NAME_LENGTH]; float kernel_times[MAX_KERNEL_EXECUTION_COUNT]; char build_option[MAX_KERNEL_BUILD_OPT]; int current_count; float kernel_sum_time; struct kernel_storage_node *next; } kernel_storage_node; typedef struct context_storage_node { uintptr_t context_id; kernel_storage_node *kernels_storage; char max_time_kernel_name[MAX_KERNEL_NAME_LENGTH]; float kernel_max_time; int kernel_count; struct context_storage_node *next; } context_storage_node; typedef struct storage { context_storage_node * context_storage; } storage; static storage record; static int atexit_registered = 0; static context_storage_node * prev_context_pointer = NULL; static kernel_storage_node * prev_kernel_pointer = NULL; static context_storage_node * find_context(cl_context context) { if(NULL != prev_context_pointer ) { if(prev_context_pointer->context_id == (uintptr_t)context) return prev_context_pointer; } if(NULL == record.context_storage) { record.context_storage = (context_storage_node *) malloc(sizeof(context_storage_node)); record.context_storage->context_id = (uintptr_t)context; record.context_storage->kernels_storage = NULL; record.context_storage->kernel_max_time = 0.0f; record.context_storage->next = NULL; record.context_storage->kernel_count = 0; return record.context_storage; } context_storage_node *pre = record.context_storage; context_storage_node *cur = record.context_storage; while(NULL !=cur && (uintptr_t)context != cur->context_id ) { pre = cur; cur = cur->next; } if(NULL != cur) return cur; pre->next = (context_storage_node *)malloc(sizeof(context_storage_node)); pre = pre->next; pre->context_id = (uintptr_t)context; pre->kernels_storage = NULL; pre->kernel_max_time = 0.0f; pre->next = NULL; pre->kernel_count = 0; return pre; } static kernel_storage_node * find_kernel(context_storage_node *p_context, const char *kernel_name, const char *build_opt) { if(NULL != prev_kernel_pointer && NULL != prev_context_pointer && p_context == prev_context_pointer && !strncmp(kernel_name, prev_kernel_pointer->kernel_name, MAX_KERNEL_NAME_LENGTH) && !strncmp(build_opt, prev_kernel_pointer->build_option, MAX_KERNEL_BUILD_OPT)) return prev_kernel_pointer; if(NULL == p_context) return NULL; if(NULL == p_context->kernels_storage) { p_context->kernels_storage = (kernel_storage_node *)malloc(sizeof(kernel_storage_node)); p_context->kernel_count++; strncpy(p_context->kernels_storage->kernel_name,kernel_name, MAX_KERNEL_NAME_LENGTH); p_context->kernels_storage->kernel_name[MAX_KERNEL_NAME_LENGTH - 1] = '\0'; strncpy(p_context->kernels_storage->build_option, build_opt, MAX_KERNEL_BUILD_OPT); p_context->kernels_storage->build_option[MAX_KERNEL_BUILD_OPT - 1] = '\0'; p_context->kernels_storage->current_count = 0; p_context->kernels_storage->kernel_sum_time = 0.0f; p_context->kernels_storage->next = NULL; return p_context->kernels_storage; } kernel_storage_node *pre = p_context->kernels_storage; kernel_storage_node *cur = p_context->kernels_storage; while(NULL != cur && (strncmp(cur->kernel_name, kernel_name, MAX_KERNEL_NAME_LENGTH) || strncmp(cur->build_option, build_opt, MAX_KERNEL_BUILD_OPT))) { pre = cur; cur = cur->next; } if(NULL != cur) return cur; p_context->kernel_count++; pre->next = (kernel_storage_node *)malloc(sizeof(kernel_storage_node)); pre = pre->next; pre->current_count = 0; pre->kernel_sum_time = 0.0f; pre->next = NULL; strncpy(pre->kernel_name, kernel_name, MAX_KERNEL_NAME_LENGTH); pre->kernel_name[MAX_KERNEL_NAME_LENGTH - 1] = '\0'; strncpy(pre->build_option, build_opt, MAX_KERNEL_BUILD_OPT); pre->build_option[MAX_KERNEL_NAME_LENGTH - 1] = '\0'; return pre; } static void free_storage() { context_storage_node *p_context = record.context_storage; while(NULL != p_context) { context_storage_node *p_tmp_context = p_context->next; kernel_storage_node *p_kernel = p_context->kernels_storage; while(NULL != p_kernel) { kernel_storage_node *p_tmp_kernel = p_kernel->next; free(p_kernel); p_kernel = p_tmp_kernel; } free(p_context); p_context = p_tmp_context; } } typedef struct time_element { char kernel_name[MAX_KERNEL_NAME_LENGTH]; float kernel_sum_time; int kernel_execute_count; double dev; float kernel_times[MAX_KERNEL_EXECUTION_COUNT]; uint32_t time_index; } time_element; static int cmp(const void *a, const void *b) { if(((time_element *)a)->kernel_sum_time < ((time_element *)b)->kernel_sum_time) return 1; else if(((time_element *)a)->kernel_sum_time > ((time_element *)b)->kernel_sum_time) return -1; else return 0; } static void print_time_info() { context_storage_node *p_context = record.context_storage; if(NULL == p_context) { printf("Nothing to output !\n"); return; } int tmp_context_id = 0; while(NULL != p_context) { printf("[------------ CONTEXT %4d ------------]\n", tmp_context_id++); printf(" ->>>> KERNELS TIME SUMMARY <<<<-\n"); kernel_storage_node *p_kernel = p_context->kernels_storage; kernel_storage_node *p_tmp_kernel = p_kernel; time_element *te = (time_element *)malloc(sizeof(time_element)*p_context->kernel_count); memset(te, 0, sizeof(time_element)*p_context->kernel_count); int i = -1, j = 0, k = 0; while(NULL != p_tmp_kernel) { for(k=0; k<=i; k++) { if(!strncmp(te[k].kernel_name, p_tmp_kernel->kernel_name, MAX_KERNEL_NAME_LENGTH)) break; } if(k == i+1) { i++; k = i; } te[k].kernel_execute_count += p_tmp_kernel->current_count; strncpy(te[k].kernel_name, p_tmp_kernel->kernel_name, MAX_KERNEL_NAME_LENGTH); te[k].kernel_name[MAX_KERNEL_NAME_LENGTH - 1] = '\0'; te[k].kernel_sum_time += p_tmp_kernel->kernel_sum_time; for(j=0; j != p_tmp_kernel->current_count; ++j) te[k].kernel_times[te[k].time_index++] = p_tmp_kernel->kernel_times[j]; p_tmp_kernel = p_tmp_kernel->next; } for(k=0; k<=i; k++) { float average = te[k].kernel_sum_time / te[k].kernel_execute_count; double sumsquare = 0.0; for(j=0; jkernel_count, sizeof(time_element), cmp); for(j=0; j<=i; ++j) sum_time += te[j].kernel_sum_time; for(j=0; j<=i; ++j) { printf(" [Kernel Name: %-30s Time(ms): (%4.1f%%) %9.2f Count: %-7d Ave(ms): %7.2f Dev: %.1lf%%]\n", te[j].kernel_name, te[j].kernel_sum_time / sum_time * 100, te[j].kernel_sum_time, te[j].kernel_execute_count, te[j].kernel_sum_time / te[j].kernel_execute_count, te[j].dev / te[j].kernel_sum_time * te[j].kernel_execute_count * 100); } free(te); printf(" Total : %.2f\n", sum_time); if(2 != b_output_kernel_perf) { printf("[------------ CONTEXT ENDS------------]\n\n"); p_context = p_context->next; continue; } p_tmp_kernel = p_kernel; printf("\n ->>>> KERNELS TIME DETAIL <<<<-\n"); while(NULL != p_kernel) { printf(" [Kernel Name : %30s Time(ms): %.2f]\n", p_kernel->kernel_name, p_kernel->kernel_sum_time); if(*p_kernel->build_option != '\0') { int count = 0; printf(" ->Build Options : "); while(p_kernel->build_option[count] != '\0' ) { printf("%c", p_kernel->build_option[count++]); if(count % 100 == 0) printf("\n "); } printf("\n"); } for(i=0; i!=p_kernel->current_count; ++i) printf(" Execution Round%5d : %.2f (ms)\n", i+1, p_kernel->kernel_times[i]); p_kernel = p_kernel->next; } printf("[------------ CONTEXT ENDS------------]\n\n"); p_context = p_context->next; } free_storage(); } static void insert(cl_context context, const char *kernel_name, const char *build_opt, float time) { if(!atexit_registered) { atexit_registered = 1; atexit(print_time_info); } context_storage_node *p_context = find_context(context); kernel_storage_node *p_kernel = find_kernel(p_context, kernel_name, build_opt); prev_context_pointer = p_context; prev_kernel_pointer = p_kernel; p_kernel->kernel_times[p_kernel->current_count++] = time; p_kernel->kernel_sum_time += time; if(p_kernel->kernel_sum_time > p_context->kernel_max_time) { p_context->kernel_max_time = p_kernel->kernel_sum_time; strncpy(p_context->max_time_kernel_name, kernel_name, MAX_KERNEL_NAME_LENGTH); p_context->max_time_kernel_name[MAX_KERNEL_NAME_LENGTH - 1] = '\0'; } } static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; int b_output_kernel_perf = 0; static struct timeval start, end; void initialize_env_var() { char *env = getenv("OCL_OUTPUT_KERNEL_PERF"); if(NULL == env || !strncmp(env,"0", 1)) b_output_kernel_perf = 0; else if(!strncmp(env,"1", 1)) b_output_kernel_perf = 1; else b_output_kernel_perf = 2; } void time_start(cl_context context, const char * kernel_name, cl_command_queue cq) { pthread_mutex_lock(&mutex); gettimeofday(&start, NULL); } void time_end(cl_context context, const char * kernel_name, const char * build_opt, cl_command_queue cq) { clFinish(cq); gettimeofday(&end, NULL); float t = (end.tv_sec - start.tv_sec)*1000 + (end.tv_usec - start.tv_usec)/1000.0f; insert(context, kernel_name, build_opt, t); pthread_mutex_unlock(&mutex); } Beignet-1.1.1-Source/src/cl_platform_id.c000664 001750 001750 00000010150 12576733264 017402 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_platform_id.h" #include "cl_internals.h" #include "cl_utils.h" #include "CL/cl.h" #include "CL/cl_ext.h" #include #include #define DECL_INFO_STRING(FIELD, STRING) \ .FIELD = STRING, \ .JOIN(FIELD,_sz) = sizeof(STRING), static struct _cl_platform_id intel_platform_data = { INIT_ICD(dispatch) DECL_INFO_STRING(profile, "FULL_PROFILE") DECL_INFO_STRING(version, LIBCL_VERSION_STRING) DECL_INFO_STRING(name, "Intel Gen OCL Driver") DECL_INFO_STRING(vendor, "Intel") DECL_INFO_STRING(icd_suffix_khr, "Intel") }; #undef DECL_INFO_STRING /* Intel platform (only GPU now). It is used as default when the API's platform ptr is NULL */ static cl_platform_id intel_platform = NULL; LOCAL cl_platform_id cl_get_platform_default(void) { if (intel_platform) return intel_platform; intel_platform = &intel_platform_data; cl_intel_platform_extension_init(intel_platform); return intel_platform; } LOCAL cl_int cl_get_platform_ids(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { if (num_platforms != NULL) *num_platforms = 1; /* Easy right now, only one platform is supported */ if(platforms) *platforms = cl_get_platform_default(); return CL_SUCCESS; } #define DECL_FIELD(CASE,FIELD) \ case JOIN(CL_,CASE): \ if (param_value_size < cl_get_platform_default()->JOIN(FIELD,_sz)) \ return CL_INVALID_VALUE; \ if (param_value_size_ret != NULL) \ *param_value_size_ret = cl_get_platform_default()->JOIN(FIELD,_sz); \ memcpy(param_value, \ cl_get_platform_default()->FIELD, \ cl_get_platform_default()->JOIN(FIELD,_sz)); \ return CL_SUCCESS; #define GET_FIELD_SZ(CASE,FIELD) \ case JOIN(CL_,CASE): \ if (param_value_size_ret != NULL) \ *param_value_size_ret = cl_get_platform_default()->JOIN(FIELD,_sz); \ return CL_SUCCESS; LOCAL cl_int cl_get_platform_info(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { if (param_value == NULL) { switch (param_name) { GET_FIELD_SZ (PLATFORM_PROFILE, profile); GET_FIELD_SZ (PLATFORM_VERSION, version); GET_FIELD_SZ (PLATFORM_NAME, name); GET_FIELD_SZ (PLATFORM_VENDOR, vendor); GET_FIELD_SZ (PLATFORM_EXTENSIONS, extensions); GET_FIELD_SZ (PLATFORM_ICD_SUFFIX_KHR, icd_suffix_khr); default: return CL_INVALID_VALUE; } } /* Fetch the platform inform */ switch (param_name) { DECL_FIELD (PLATFORM_PROFILE, profile); DECL_FIELD (PLATFORM_VERSION, version); DECL_FIELD (PLATFORM_NAME, name); DECL_FIELD (PLATFORM_VENDOR, vendor); DECL_FIELD (PLATFORM_EXTENSIONS, extensions); DECL_FIELD (PLATFORM_ICD_SUFFIX_KHR, icd_suffix_khr); default: return CL_INVALID_VALUE; } } #undef DECL_FIELD Beignet-1.1.1-Source/src/cl_mem.h000664 001750 001750 00000025547 12605356050 015670 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_MEM_H__ #define __CL_MEM_H__ #include "cl_internals.h" #include "cl_driver_type.h" #include "CL/cl.h" #include "cl_khr_icd.h" #include #include #ifndef CL_VERSION_1_2 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 typedef struct _cl_image_desc { cl_mem_object_type image_type; size_t image_width; size_t image_height; size_t image_depth; size_t image_array_size; size_t image_row_pitch; size_t image_slice_pitch; cl_uint num_mip_levels; cl_uint num_samples; cl_mem buffer; } cl_image_desc; #endif typedef enum cl_image_tiling { CL_NO_TILE = 0, CL_TILE_X = 1, CL_TILE_Y = 2 } cl_image_tiling_t; typedef struct _cl_mapped_ptr { void * ptr; void * v_ptr; size_t size; size_t origin[3]; /* mapped origin */ size_t region[3]; /* mapped region */ }cl_mapped_ptr; typedef struct _cl_mem_dstr_cb { struct _cl_mem_dstr_cb * next; void (CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data); void *user_data; }cl_mem_dstr_cb; /* Used for buffers and images */ enum cl_mem_type { CL_MEM_BUFFER_TYPE, CL_MEM_SUBBUFFER_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, CL_MEM_BUFFER1D_IMAGE_TYPE }; #define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE) #define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE) typedef struct _cl_mem { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a memory object */ cl_mem prev, next; /* We chain the memory buffers together */ enum cl_mem_type type; volatile int ref_n; /* This object is reference counted */ cl_buffer bo; /* Data in GPU memory */ size_t size; /* original request size, not alignment size, used in constant buffer */ cl_context ctx; /* Context it belongs to */ cl_mem_flags flags; /* Flags specified at the creation time */ void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR */ cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */ int mapped_ptr_sz; /* The array size of mapped_ptr. */ int map_ref; /* The mapped count. */ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/ } _cl_mem; struct _cl_mem_image { _cl_mem base; cl_image_format fmt; /* only for images */ uint32_t intel_fmt; /* format to provide in the surface state */ uint32_t bpp; /* number of bytes per pixel */ cl_mem_object_type image_type; /* only for images 1D/2D...*/ size_t w, h, depth; /* only for images (depth is only for 3D images) */ size_t row_pitch, slice_pitch; size_t host_row_pitch, host_slice_pitch; cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */ size_t tile_x, tile_y; /* tile offset, used for mipmap images. */ size_t offset; /* offset for dri_bo, used when it's reloc. */ cl_mem buffer_1d; /* if the image is created from buffer, it point to the buffer.*/ }; struct _cl_mem_gl_image { struct _cl_mem_image base; uint32_t target; int miplevel; uint32_t texture; }; struct _cl_mem_buffer1d_image { struct _cl_mem_image base; uint32_t size; }; inline static void cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h, cl_mem_object_type image_type, size_t depth, cl_image_format fmt, uint32_t intel_fmt, uint32_t bpp, size_t row_pitch, size_t slice_pitch, cl_image_tiling_t tiling, size_t tile_x, size_t tile_y, size_t offset) { image->w = w; image->h = h; image->image_type = image_type; image->depth = depth; image->fmt = fmt; image->intel_fmt = intel_fmt; image->bpp = bpp; image->row_pitch = row_pitch; image->slice_pitch = slice_pitch; image->tiling = tiling; image->tile_x = tile_x; image->tile_y = tile_y; image->offset = offset; } struct _cl_mem_buffer { _cl_mem base; struct _cl_mem_buffer* subs; /* Sub buf objects. */ size_t sub_offset; /* The sub start offset. */ struct _cl_mem_buffer* sub_prev, *sub_next;/* We chain the sub memory buffers together */ pthread_mutex_t sub_lock; /* Sub buffers list lock*/ struct _cl_mem_buffer* parent; /* Point to the parent buffer if is sub-buffer */ }; inline static struct _cl_mem_image * cl_mem_image(cl_mem mem) { assert(IS_IMAGE(mem)); return (struct _cl_mem_image *)mem; } inline static struct _cl_mem_gl_image * cl_mem_gl_image(cl_mem mem) { assert(IS_GL_IMAGE(mem)); return (struct _cl_mem_gl_image*)mem; } inline static struct _cl_mem_buffer * cl_mem_buffer(cl_mem mem) { assert(!IS_IMAGE(mem)); return (struct _cl_mem_buffer *)mem; } /* Query information about a memory object */ extern cl_int cl_get_mem_object_info(cl_mem, cl_mem_info, size_t, void *, size_t *); /* Query information about an image */ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *); /* Query whether mem is in buffers */ extern cl_int is_valid_mem(cl_mem mem, cl_mem buffers); /* Create a new memory object and initialize it with possible user data */ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*); /* Create a new sub memory object */ extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *); /* Idem but this is an image */ extern cl_mem cl_mem_new_image(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret); /* Unref the object and delete it if no more reference */ extern void cl_mem_delete(cl_mem); /* Destroy egl image. */ extern void cl_mem_gl_delete(struct _cl_mem_gl_image *); /* Add one more reference to this object */ extern void cl_mem_add_ref(cl_mem); /* api clEnqueueCopyBuffer help function */ extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, size_t src_offset, size_t dst_offset, size_t cb); extern cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size, cl_mem buffer, size_t offset, size_t size); extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image*, const size_t *, const size_t *); /* api clEnqueueCopyBufferRect help function */ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, const size_t *, size_t, size_t, size_t, size_t); /* api clEnqueueCopyImage help function */ extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*, const size_t *, const size_t *, const size_t *); /* api clEnqueueCopyImageToBuffer help function */ extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct _cl_mem_image*, cl_mem, const size_t *, const size_t, const size_t *); /* api clEnqueueCopyBufferToImage help function */ extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue, cl_mem, struct _cl_mem_image*, const size_t, const size_t *, const size_t *); /* Directly map a memory object */ extern void *cl_mem_map(cl_mem, int); /* Unmap a memory object */ extern cl_int cl_mem_unmap(cl_mem); /* Directly map a memory object in GTT mode */ extern void *cl_mem_map_gtt(cl_mem); /* Directly map a memory object in GTT mode, with out waiting gpu idle */ extern void *cl_mem_map_gtt_unsync(cl_mem); /* Unmap a memory object in GTT mode */ extern cl_int cl_mem_unmap_gtt(cl_mem); /* Directly map a memory object - tiled images are mapped in GTT mode */ extern void *cl_mem_map_auto(cl_mem, int); /* Unmap a memory object - tiled images are unmapped in GTT mode */ extern cl_int cl_mem_unmap_auto(cl_mem); /* Pin/unpin the buffer in memory (you must be root) */ extern cl_int cl_mem_pin(cl_mem); extern cl_int cl_mem_unpin(cl_mem); extern cl_mem cl_mem_allocate(enum cl_mem_type type, cl_context ctx, cl_mem_flags flags, size_t sz, cl_int is_tiled, void *host_ptr, cl_int *errcode); void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, const void *src, size_t src_row_pitch, size_t src_slice_pitch, const struct _cl_mem_image *image, cl_bool offset_dst, cl_bool offset_src); void cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region, const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image); extern cl_mem cl_mem_new_libva_buffer(cl_context ctx, unsigned int bo_name, cl_int *errcode); extern cl_mem cl_mem_new_libva_image(cl_context ctx, unsigned int bo_name, size_t offset, size_t width, size_t height, cl_image_format fmt, size_t row_pitch, cl_int *errcode); extern cl_int cl_mem_get_fd(cl_mem mem, int* fd); #endif /* __CL_MEM_H__ */ Beignet-1.1.1-Source/src/cl_driver.h000664 001750 001750 00000041403 12576733264 016407 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_DRIVER_H__ #define __CL_DRIVER_H__ #include #include #include "cl_driver_type.h" /* Various limitations we should remove actually */ #define GEN_MAX_SURFACES 256 #define GEN_MAX_SAMPLERS 16 /************************************************************************** * cl_driver: * Hide behind some call backs the buffer allocation / deallocation ... This * will allow us to make the use of a software performance simulator easier and * to minimize the code specific for the HW and for the simulator **************************************************************************/ /* Create a new driver */ typedef cl_driver (cl_driver_new_cb)(cl_context_prop); extern cl_driver_new_cb *cl_driver_new; /* Delete the driver */ typedef void (cl_driver_delete_cb)(cl_driver); extern cl_driver_delete_cb *cl_driver_delete; /* Get the buffer manager from the driver */ typedef cl_buffer_mgr (cl_driver_get_bufmgr_cb)(cl_driver); extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr; /* Get the Gen version from the driver */ typedef uint32_t (cl_driver_get_ver_cb)(cl_driver); extern cl_driver_get_ver_cb *cl_driver_get_ver; typedef enum cl_self_test_res{ SELF_TEST_PASS = 0, SELF_TEST_SLM_FAIL = 1, SELF_TEST_ATOMIC_FAIL = 2, SELF_TEST_OTHER_FAIL = 3, } cl_self_test_res; /* Set the atomic enable/disable flag in the driver */ typedef void (cl_driver_set_atomic_flag_cb)(cl_driver, int); extern cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag; /************************************************************************** * GPGPU command streamer **************************************************************************/ /* Describe texture tiling */ typedef enum cl_gpgpu_tiling { GPGPU_NO_TILE = 0, GPGPU_TILE_X = 1, GPGPU_TILE_Y = 2, } cl_gpgpu_tiling; /* Cache control options for gen7 */ typedef enum cl_cache_control { cc_gtt = 0x0, cc_l3 = 0x1, cc_llc = 0x2, cc_llc_l3 = 0x3 } cl_cache_control; /* L3 Cache control options for gen75 */ typedef enum cl_l3_cache_control { l3cc_uc = 0x0, l3cc_ec = 0x1 } cl_l3_cache_control; /* LLCCC Cache control options for gen75 */ typedef enum cl_llccc_cache_control { llccc_pte = 0x0<<1, llccc_uc = 0x1<<1, llccc_ec = 0x2<<1, llccc_ucllc = 0x3<<1 } cl_llccc_cache_control; /* Target Cache control options for gen8 */ typedef enum cl_target_cache_control { tcc_ec_only = 0x0<<3, tcc_llc_only = 0x1<<3, tcc_llc_ec = 0x2<<3, tcc_llc_ec_l3 = 0x3<<3 } cl_target_cache_control; /* Memory type LLC/ELLC Cache control options for gen8 */ typedef enum cl_mtllc_cache_control { mtllc_pte = 0x0<<5, mtllc_none = 0x1<<5, mtllc_wt = 0x2<<5, mtllc_wb = 0x3<<5 } cl_mtllc_cache_control; typedef enum gpu_command_status { command_queued = 3, command_submitted = 2, command_running = 1, command_complete = 0 } gpu_command_status; /* Use this structure to bind kernels in the gpgpu state */ typedef struct cl_gpgpu_kernel { const char *name; /* kernel name and bo name */ uint32_t grf_blocks; /* register blocks kernel wants (in 8 reg blocks) */ uint32_t curbe_sz; /* total size of all curbes */ cl_buffer bo; /* kernel code in the proper addr space */ int32_t barrierID; /* barrierID for _this_ kernel */ uint32_t use_slm:1; /* For gen7 (automatic barrier management) */ uint32_t thread_n:15; /* For gen7 (automatic barrier management) */ uint32_t slm_sz; /* For gen7 (automatic SLM allocation) */ } cl_gpgpu_kernel; /* Create a new gpgpu state */ typedef cl_gpgpu (cl_gpgpu_new_cb)(cl_driver); extern cl_gpgpu_new_cb *cl_gpgpu_new; /* Delete the gpgpu state */ typedef void (cl_gpgpu_delete_cb)(cl_gpgpu); extern cl_gpgpu_delete_cb *cl_gpgpu_delete; /* Synchonize GPU with CPU */ typedef void (cl_gpgpu_sync_cb)(void*); extern cl_gpgpu_sync_cb *cl_gpgpu_sync; /* Bind a regular unformatted buffer */ typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, uint32_t size, uint8_t bti); extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf; /* bind samplers defined in both kernel and kernel args. */ typedef void (cl_gpgpu_bind_sampler_cb)(cl_gpgpu, uint32_t *samplers, size_t sampler_sz); extern cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler; /* get the default cache control value. */ typedef uint32_t (cl_gpgpu_get_cache_ctrl_cb)(); extern cl_gpgpu_get_cache_ctrl_cb *cl_gpgpu_get_cache_ctrl; /* Set a 2d texture */ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state, uint32_t id, cl_buffer obj_bo, uint32_t obj_bo_offset, uint32_t format, uint32_t bpp, uint32_t type, int32_t w, int32_t h, int32_t depth, int pitch, int32_t slice_pitch, cl_gpgpu_tiling tiling); extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image; /* Setup a stack */ typedef void (cl_gpgpu_set_stack_cb)(cl_gpgpu, uint32_t offset, uint32_t size, uint32_t cchint); extern cl_gpgpu_set_stack_cb *cl_gpgpu_set_stack; /* Setup scratch */ typedef int (cl_gpgpu_set_scratch_cb)(cl_gpgpu, uint32_t per_thread_size); extern cl_gpgpu_set_scratch_cb *cl_gpgpu_set_scratch; /* Configure internal state */ typedef int (cl_gpgpu_state_init_cb)(cl_gpgpu, uint32_t max_threads, uint32_t size_cs_entry, int profiling); extern cl_gpgpu_state_init_cb *cl_gpgpu_state_init; /* Set the buffer object where to report performance counters */ typedef void (cl_gpgpu_set_perf_counters_cb)(cl_gpgpu, cl_buffer perf); extern cl_gpgpu_set_perf_counters_cb *cl_gpgpu_set_perf_counters; /* Fills current curbe buffer with data */ typedef int (cl_gpgpu_upload_curbes_cb)(cl_gpgpu, const void* data, uint32_t size); extern cl_gpgpu_upload_curbes_cb *cl_gpgpu_upload_curbes; typedef cl_buffer (cl_gpgpu_alloc_constant_buffer_cb)(cl_gpgpu, uint32_t size, uint8_t bti); extern cl_gpgpu_alloc_constant_buffer_cb *cl_gpgpu_alloc_constant_buffer; /* Setup all indirect states */ typedef void (cl_gpgpu_states_setup_cb)(cl_gpgpu, cl_gpgpu_kernel *kernel); extern cl_gpgpu_states_setup_cb *cl_gpgpu_states_setup; /* Upload the constant samplers as specified inside the OCL kernel */ typedef void (cl_gpgpu_upload_samplers_cb)(cl_gpgpu *state, const void *data, uint32_t n); extern cl_gpgpu_upload_samplers_cb *cl_gpgpu_upload_samplers; /* Set a sampler */ typedef void (cl_gpgpu_set_sampler_cb)(cl_gpgpu, uint32_t index, uint32_t non_normalized); extern cl_gpgpu_set_sampler_cb *cl_gpgpu_set_sampler; /* Allocate the batch buffer and return the BO used for the batch buffer */ typedef int (cl_gpgpu_batch_reset_cb)(cl_gpgpu, size_t sz); extern cl_gpgpu_batch_reset_cb *cl_gpgpu_batch_reset; /* Atomic begin, pipeline select, urb, pipeline state and constant buffer */ typedef void (cl_gpgpu_batch_start_cb)(cl_gpgpu); extern cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start; /* atomic end with possibly inserted flush */ typedef void (cl_gpgpu_batch_end_cb)(cl_gpgpu, int32_t flush_mode); extern cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end; /* Flush the command buffer */ typedef int (cl_gpgpu_flush_cb)(cl_gpgpu); extern cl_gpgpu_flush_cb *cl_gpgpu_flush; /* new a event for a batch buffer */ typedef cl_gpgpu_event (cl_gpgpu_event_new_cb)(cl_gpgpu); extern cl_gpgpu_event_new_cb *cl_gpgpu_event_new; /* update the batch buffer of this event */ typedef int (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int); extern cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status; /* flush the batch buffer of this event */ typedef void (cl_gpgpu_event_flush_cb)(cl_gpgpu_event); extern cl_gpgpu_event_flush_cb *cl_gpgpu_event_flush; /* cancel exec batch buffer of this event */ typedef void (cl_gpgpu_event_cancel_cb)(cl_gpgpu_event); extern cl_gpgpu_event_cancel_cb *cl_gpgpu_event_cancel; /* delete a gpgpu event */ typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event); extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete; /* Get a event time stamp */ typedef void (cl_gpgpu_event_get_exec_timestamp_cb)(cl_gpgpu, cl_gpgpu_event, int, uint64_t*); extern cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp; /* Get current GPU time stamp */ typedef void (cl_gpgpu_event_get_gpu_cur_timestamp_cb)(cl_gpgpu, uint64_t*); extern cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp; /* Get current batch buffer handle */ typedef void* (cl_gpgpu_ref_batch_buf_cb)(cl_gpgpu); extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf; /* Get release batch buffer handle */ typedef void (cl_gpgpu_unref_batch_buf_cb)(void*); extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf; /* Set the printf buffer */ typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t); extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer; /* get the printf buffer offset in the apeture*/ typedef unsigned long (cl_gpgpu_reloc_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t); extern cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer; /* map the printf buffer */ typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu, uint32_t); extern cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer; /* unmap the printf buffer */ typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu, uint32_t); extern cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer; /* release the printf buffer */ typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu, uint32_t); extern cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer; /* Set the last printfset pointer */ typedef int (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *, size_t*); extern cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info; /* Get the last printfset pointer */ typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu, size_t*, size_t*); extern cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info; /* Will spawn all threads */ typedef void (cl_gpgpu_walker_cb)(cl_gpgpu, uint32_t simd_sz, uint32_t thread_n, const size_t global_wk_off[3], const size_t global_wk_sz[3], const size_t local_wk_sz[3]); extern cl_gpgpu_walker_cb *cl_gpgpu_walker; /************************************************************************** * Buffer **************************************************************************/ /* Allocate a buffer */ typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t); extern cl_buffer_alloc_cb *cl_buffer_alloc; typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long); extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr; /* Set a buffer's tiling mode */ typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride); extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling; #include "cl_context.h" #include "cl_mem.h" typedef cl_buffer (cl_buffer_alloc_from_texture_cb)(cl_context, unsigned int, int, unsigned int, struct _cl_mem_image *gl_image); extern cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture; typedef void (cl_buffer_release_from_texture_cb)(cl_context, unsigned int, int, unsigned int); extern cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture; typedef cl_buffer (cl_buffer_get_buffer_from_libva_cb)(cl_context ctx, unsigned int bo_name, size_t *sz); extern cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva; typedef cl_buffer (cl_buffer_get_image_from_libva_cb)(cl_context ctx, unsigned int bo_name, struct _cl_mem_image *image); extern cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva; /* Unref a buffer and destroy it if no more ref */ typedef int (cl_buffer_unreference_cb)(cl_buffer); extern cl_buffer_unreference_cb *cl_buffer_unreference; /* Add one more ref on a buffer */ typedef void (cl_buffer_reference_cb)(cl_buffer); extern cl_buffer_reference_cb *cl_buffer_reference; /* Map a buffer */ typedef int (cl_buffer_map_cb)(cl_buffer, uint32_t write_enable); extern cl_buffer_map_cb *cl_buffer_map; /* Unmap a buffer */ typedef int (cl_buffer_unmap_cb)(cl_buffer); extern cl_buffer_unmap_cb *cl_buffer_unmap; /* Map a buffer in the GTT domain */ typedef int (cl_buffer_map_gtt_cb)(cl_buffer); extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt; /* Map a buffer in the GTT domain, non waiting the GPU read or write*/ typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer); extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync; /* Unmap a buffer in the GTT domain */ typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer); extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt; /* Get the virtual address (when mapped) */ typedef void* (cl_buffer_get_virtual_cb)(cl_buffer); extern cl_buffer_get_virtual_cb *cl_buffer_get_virtual; /* Get the size of the buffer */ typedef size_t (cl_buffer_get_size_cb)(cl_buffer); extern cl_buffer_get_size_cb *cl_buffer_get_size; /* Pin a buffer */ typedef int (cl_buffer_pin_cb)(cl_buffer, uint32_t alignment); extern cl_buffer_pin_cb *cl_buffer_pin; /* Unpin a buffer */ typedef int (cl_buffer_unpin_cb)(cl_buffer); extern cl_buffer_unpin_cb *cl_buffer_unpin; /* Fill data in the buffer */ typedef int (cl_buffer_subdata_cb)(cl_buffer, unsigned long, unsigned long, const void*); extern cl_buffer_subdata_cb *cl_buffer_subdata; /* Get data from buffer */ typedef int (cl_buffer_get_subdata_cb)(cl_buffer, unsigned long, unsigned long, void*); extern cl_buffer_get_subdata_cb *cl_buffer_get_subdata; /* Wait for all pending rendering for this buffer to complete */ typedef int (cl_buffer_wait_rendering_cb) (cl_buffer); extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering; typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd); extern cl_buffer_get_fd_cb *cl_buffer_get_fd; typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim); extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align; /* Get the device id */ typedef int (cl_driver_get_device_id_cb)(void); extern cl_driver_get_device_id_cb *cl_driver_get_device_id; /* Update the device info */ typedef void (cl_driver_update_device_info_cb)(cl_device_id device); extern cl_driver_update_device_info_cb *cl_driver_update_device_info; /************************************************************************** * cl_khr_gl_sharing. **************************************************************************/ typedef int (cl_gl_acquire_texture_cb)(void *driver, void *ctx, int target, int level, int texture, void*user_data); extern cl_gl_acquire_texture_cb *cl_gl_acquire_texture; typedef int (cl_gl_release_texture_cb)(void *driver, void *ctx, int target, int level, int texture); extern cl_gl_release_texture_cb *cl_gl_release_texture; typedef int (cl_gl_acquire_buffer_object_cb)(void *driver, void *ctx, int bufobj, void* user_data); extern cl_gl_acquire_buffer_object_cb *cl_gl_acquire_buffer_object; typedef int (cl_gl_release_buffer_object_cb)(void *driver, void *ctx, int bufobj); extern cl_gl_release_buffer_object_cb *cl_gl_release_buffer_object; typedef int (cl_gl_acquire_render_buffer_cb)(void *driver, void *ctx, int rb, void* user_data); extern cl_gl_acquire_render_buffer_cb *cl_gl_acquire_render_buffer; typedef int (cl_gl_release_render_buffer_cb)(void *driver, void *ctx, int rb); extern cl_gl_release_render_buffer_cb *cl_gl_release_render_buffer; #ifndef DEFAULT_DRIVER_DIR /* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */ #define DEFAULT_DRIVER_DIR "/usr/local/lib/dri" #endif #endif /* __CL_DRIVER_H__ */ Beignet-1.1.1-Source/src/cl_extensions.h000664 001750 001750 00000005560 12576733264 017317 0ustar00yryr000000 000000 /* The following approved Khronos extension * names must be returned by all device that * support OpenCL C 1.2. */ #define DECL_BASE_EXTENSIONS \ DECL_EXT(khr_global_int32_base_atomics) \ DECL_EXT(khr_global_int32_extended_atomics) \ DECL_EXT(khr_local_int32_base_atomics) \ DECL_EXT(khr_local_int32_extended_atomics) \ DECL_EXT(khr_byte_addressable_store) \ DECL_EXT(khr_fp64) /* The OPT1 extensions are those optional extensions * which don't have external dependecies*/ #define DECL_OPT1_EXTENSIONS \ DECL_EXT(khr_int64_base_atomics)\ DECL_EXT(khr_int64_extended_atomics)\ DECL_EXT(khr_3d_image_writes)\ DECL_EXT(khr_fp16)\ DECL_EXT(khr_image2d_from_buffer)\ DECL_EXT(khr_initialize_memory)\ DECL_EXT(khr_context_abort)\ DECL_EXT(khr_depth_images)\ DECL_EXT(khr_spir) \ DECL_EXT(khr_icd) #define DECL_GL_EXTENSIONS \ DECL_EXT(khr_gl_sharing)\ DECL_EXT(khr_gl_event)\ DECL_EXT(khr_gl_depth_images)\ DECL_EXT(khr_gl_msaa_sharing) #define DECL_D3D_EXTENSIONS \ DECL_EXT(khr_d3d10_sharing)\ DECL_EXT(khr_dx9_media_sharing)\ DECL_EXT(khr_d3d11_sharing)\ #define DECL_ALL_EXTENSIONS \ DECL_BASE_EXTENSIONS \ DECL_OPT1_EXTENSIONS \ DECL_GL_EXTENSIONS \ DECL_D3D_EXTENSIONS #define EXT_ID(name) cl_ ## name ## _ext_id #define EXT_STRUCT_NAME(name) cl_ ## name ## ext /*Declare enum ids */ typedef enum { #define DECL_EXT(name) EXT_ID(name), DECL_ALL_EXTENSIONS #undef DECL_EXT cl_khr_extension_id_max }cl_extension_enum; #define BASE_EXT_START_ID EXT_ID(khr_global_int32_base_atomics) #define BASE_EXT_END_ID EXT_ID(khr_fp64) #define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics) #define OPT1_EXT_END_ID EXT_ID(khr_icd) #define GL_EXT_START_ID EXT_ID(khr_gl_sharing) #define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing) #define IS_BASE_EXTENSION(id) (id >= BASE_EXT_START_ID && id <= BASE_EXT_END_ID) #define IS_OPT1_EXTENSION(id) (id >= OPT1_EXT_START_ID && id <= OPT1_EXT_END_ID) #define IS_GL_EXTENSION(id) (id >= GL_EXT_START_ID && id <= GL_EXT_END_ID) struct cl_extension_base { cl_extension_enum ext_id; int ext_enabled; char *ext_name; }; /* Declare each extension structure. */ #define DECL_EXT(name) \ struct EXT_STRUCT_NAME(name) { \ struct cl_extension_base base;\ }; DECL_BASE_EXTENSIONS DECL_OPT1_EXTENSIONS DECL_D3D_EXTENSIONS DECL_GL_EXTENSIONS #undef DECL_EXT /* Union all extensions together. */ typedef union { struct cl_extension_base base; #define DECL_EXT(name) struct EXT_STRUCT_NAME(name) EXT_STRUCT_NAME(name); DECL_ALL_EXTENSIONS #undef DECL_EXT } extension_union; typedef struct cl_extensions { extension_union extensions[cl_khr_extension_id_max]; char ext_str[256]; } cl_extensions_t; extern void cl_intel_platform_extension_init(cl_platform_id intel_platform); extern void cl_intel_platform_enable_fp16_extension(cl_device_id device); extern void cl_intel_platform_get_default_extension(cl_device_id device); Beignet-1.1.1-Source/src/cl_thread.c000664 001750 001750 00000020654 12576733264 016363 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * */ #include #include #include "cl_thread.h" #include "cl_alloc.h" #include "cl_utils.h" /* Because the cl_command_queue can be used in several threads simultaneously but without add ref to it, we now handle it like this: Keep one threads_slot_array, every time the thread get gpgpu or batch buffer, if it does not have a slot, assign it. The resources are keeped in queue private, and resize it if needed. When the thread exit, the slot will be set invalid. When queue released, all the resources will be released. If user still enqueue, flush or finish the queue after it has been released, the behavior is undefined. TODO: Need to shrink the slot map. */ static int thread_array_num = 1; static int *thread_slot_map = NULL; static int thread_magic_num = 1; static pthread_mutex_t thread_queue_map_lock = PTHREAD_MUTEX_INITIALIZER; static __thread int thread_id = -1; static __thread int thread_magic = -1; typedef struct _thread_spec_data { cl_gpgpu gpgpu ; int valid; void* thread_batch_buf; cl_event last_event; cl_event current_event; int thread_magic; } thread_spec_data; typedef struct _queue_thread_private { thread_spec_data** threads_data; int threads_data_num; pthread_mutex_t thread_data_lock; } queue_thread_private; static thread_spec_data * __create_thread_spec_data(cl_command_queue queue, int create) { queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data)); thread_spec_data* spec = NULL; int i = 0; if (thread_id == -1) { pthread_mutex_lock(&thread_queue_map_lock); for (i = 0; i < thread_array_num; i++) { if (thread_slot_map[i] == 0) { thread_id = i; break; } } if (i == thread_array_num) { thread_array_num *= 2; thread_slot_map = realloc(thread_slot_map, sizeof(int) * thread_array_num); memset(thread_slot_map + thread_array_num/2, 0, sizeof(int) * (thread_array_num/2)); thread_id = thread_array_num/2; } thread_slot_map[thread_id] = 1; thread_magic = thread_magic_num++; pthread_mutex_unlock(&thread_queue_map_lock); } pthread_mutex_lock(&thread_private->thread_data_lock); if (thread_array_num > thread_private->threads_data_num) {// just enlarge int old_num = thread_private->threads_data_num; thread_private->threads_data_num = thread_array_num; thread_private->threads_data = realloc(thread_private->threads_data, thread_private->threads_data_num * sizeof(void *)); memset(thread_private->threads_data + old_num, 0, sizeof(void*) * (thread_private->threads_data_num - old_num)); } assert(thread_id != -1 && thread_id < thread_array_num); spec = thread_private->threads_data[thread_id]; if (!spec && create) { spec = CALLOC(thread_spec_data); spec->thread_magic = thread_magic; thread_private->threads_data[thread_id] = spec; } pthread_mutex_unlock(&thread_private->thread_data_lock); return spec; } cl_event get_current_event(cl_command_queue queue) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); assert(spec && spec->thread_magic == thread_magic); return spec->current_event; } cl_event get_last_event(cl_command_queue queue) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); assert(spec && spec->thread_magic == thread_magic); return spec->last_event; } void set_current_event(cl_command_queue queue, cl_event e) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); assert(spec && spec->thread_magic == thread_magic); spec->current_event = e; } void set_last_event(cl_command_queue queue, cl_event e) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); assert(spec && spec->thread_magic == thread_magic); spec->last_event = e; } void* cl_thread_data_create(void) { queue_thread_private* thread_private = CALLOC(queue_thread_private); if (thread_private == NULL) return NULL; if (thread_slot_map == NULL) { pthread_mutex_lock(&thread_queue_map_lock); thread_slot_map = calloc(thread_array_num, sizeof(int)); pthread_mutex_unlock(&thread_queue_map_lock); } pthread_mutex_init(&thread_private->thread_data_lock, NULL); pthread_mutex_lock(&thread_private->thread_data_lock); thread_private->threads_data = malloc(thread_array_num * sizeof(void *)); memset(thread_private->threads_data, 0, sizeof(void*) * thread_array_num); thread_private->threads_data_num = thread_array_num; pthread_mutex_unlock(&thread_private->thread_data_lock); return thread_private; } cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); if (!spec->thread_magic && spec->thread_magic != thread_magic) { //We may get the slot from last thread. So free the resource. spec->valid = 0; } if (!spec->valid) { if (spec->thread_batch_buf) { cl_gpgpu_unref_batch_buf(spec->thread_batch_buf); spec->thread_batch_buf = NULL; } if (spec->gpgpu) { cl_gpgpu_delete(spec->gpgpu); spec->gpgpu = NULL; } TRY_ALLOC_NO_ERR(spec->gpgpu, cl_gpgpu_new(queue->ctx->drv)); spec->valid = 1; } error: return spec->gpgpu; } void cl_set_thread_batch_buf(cl_command_queue queue, void* buf) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); assert(spec && spec->thread_magic == thread_magic); if (spec->thread_batch_buf) { cl_gpgpu_unref_batch_buf(spec->thread_batch_buf); } spec->thread_batch_buf = buf; } void* cl_get_thread_batch_buf(cl_command_queue queue) { thread_spec_data* spec = __create_thread_spec_data(queue, 1); assert(spec && spec->thread_magic == thread_magic); return spec->thread_batch_buf; } void cl_invalid_thread_gpgpu(cl_command_queue queue) { queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data)); thread_spec_data* spec = NULL; pthread_mutex_lock(&thread_private->thread_data_lock); spec = thread_private->threads_data[thread_id]; assert(spec); pthread_mutex_unlock(&thread_private->thread_data_lock); if (!spec->valid) { return; } assert(spec->gpgpu); cl_gpgpu_delete(spec->gpgpu); spec->gpgpu = NULL; spec->valid = 0; } cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue) { queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data)); thread_spec_data* spec = NULL; pthread_mutex_lock(&thread_private->thread_data_lock); spec = thread_private->threads_data[thread_id]; assert(spec); pthread_mutex_unlock(&thread_private->thread_data_lock); if (!spec->valid) return NULL; assert(spec->gpgpu); cl_gpgpu gpgpu = spec->gpgpu; spec->gpgpu = NULL; spec->valid = 0; return gpgpu; } /* The destructor for clean the thread specific data. */ void cl_thread_data_destroy(cl_command_queue queue) { int i = 0; queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data)); int threads_data_num; thread_spec_data** threads_data; pthread_mutex_lock(&thread_private->thread_data_lock); threads_data_num = thread_private->threads_data_num; threads_data = thread_private->threads_data; thread_private->threads_data_num = 0; thread_private->threads_data = NULL; pthread_mutex_unlock(&thread_private->thread_data_lock); cl_free(thread_private); queue->thread_data = NULL; for (i = 0; i < threads_data_num; i++) { if (threads_data[i] != NULL && threads_data[i]->thread_batch_buf) { cl_gpgpu_unref_batch_buf(threads_data[i]->thread_batch_buf); threads_data[i]->thread_batch_buf = NULL; } if (threads_data[i] != NULL && threads_data[i]->valid) { cl_gpgpu_delete(threads_data[i]->gpgpu); threads_data[i]->gpgpu = NULL; threads_data[i]->valid = 0; } cl_free(threads_data[i]); } cl_free(threads_data); } Beignet-1.1.1-Source/src/cl_device_id.h000664 001750 001750 00000013443 12605356050 017015 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_DEVICE_ID_H__ #define __CL_DEVICE_ID_H__ /* Store complete information about the device */ struct _cl_device_id { DEFINE_ICD(dispatch) cl_device_type device_type; cl_uint device_id; cl_uint vendor_id; cl_uint max_compute_unit; // maximum EU number cl_uint max_thread_per_unit; // maximum EU threads per EU. cl_uint sub_slice_count; // Device's sub slice count cl_uint max_work_item_dimensions; // should be 3. size_t max_work_item_sizes[3]; // equal to maximum work group size. size_t max_work_group_size; // maximum work group size under simd16 mode. size_t max_1d_global_work_sizes[3]; // maximum 1d global work size for builtin kernels. size_t max_2d_global_work_sizes[3]; // maximum 2d global work size for builtin kernels. size_t max_3d_global_work_sizes[3]; // maximum 3d global work size for builtin kernels. cl_uint preferred_vector_width_char; cl_uint preferred_vector_width_short; cl_uint preferred_vector_width_int; cl_uint preferred_vector_width_long; cl_uint preferred_vector_width_float; cl_uint preferred_vector_width_double; cl_uint preferred_vector_width_half; cl_uint native_vector_width_char; cl_uint native_vector_width_short; cl_uint native_vector_width_int; cl_uint native_vector_width_long; cl_uint native_vector_width_float; cl_uint native_vector_width_double; cl_uint native_vector_width_half; cl_uint max_clock_frequency; cl_uint address_bits; cl_ulong max_mem_alloc_size; cl_bool image_support; cl_uint max_read_image_args; cl_uint max_write_image_args; size_t image2d_max_width; size_t image_max_array_size; size_t image2d_max_height; size_t image3d_max_width; size_t image3d_max_height; size_t image3d_max_depth; size_t image_mem_size; cl_uint max_samplers; size_t max_parameter_size; cl_uint mem_base_addr_align; cl_uint min_data_type_align_size; cl_device_fp_config single_fp_config; cl_device_fp_config half_fp_config; cl_device_fp_config double_fp_config; cl_device_mem_cache_type global_mem_cache_type; cl_uint global_mem_cache_line_size; cl_ulong global_mem_cache_size; cl_ulong global_mem_size; cl_ulong max_constant_buffer_size; cl_uint max_constant_args; cl_device_local_mem_type local_mem_type; cl_ulong local_mem_size; cl_ulong scratch_mem_size; cl_bool error_correction_support; cl_bool host_unified_memory; size_t profiling_timer_resolution; cl_bool endian_little; cl_bool available; cl_bool compiler_available; cl_bool linker_available; cl_device_exec_capabilities execution_capabilities; cl_command_queue_properties queue_properties; cl_platform_id platform; size_t printf_buffer_size; cl_bool interop_user_sync; const char *name; const char *vendor; const char *version; const char *profile; const char *opencl_c_version; const char extensions[256]; const char *driver_version; const char *built_in_kernels; size_t name_sz; size_t vendor_sz; size_t version_sz; size_t profile_sz; size_t opencl_c_version_sz; size_t extensions_sz; size_t driver_version_sz; size_t built_in_kernels_sz; /* Kernel specific info that we're assigning statically */ size_t preferred_wg_sz_mul; /* SubDevice specific info */ cl_device_id parent_device; cl_uint partition_max_sub_device; cl_device_partition_property partition_property[3]; cl_device_affinity_domain affinity_domain; cl_device_partition_property partition_type[3]; cl_uint device_reference_count; uint32_t atomic_test_result; }; /* Get a device from the given platform */ extern cl_int cl_get_device_ids(cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id * devices, cl_uint * num_devices); /* Get the intel GPU device we currently have in this machine (if any) */ extern cl_device_id cl_get_gt_device(void); /* Provide info about the device */ extern cl_int cl_get_device_info(cl_device_id device, cl_device_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); extern cl_int cl_get_kernel_workgroup_info(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); /* Returns the Gen device ID */ extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver); extern size_t cl_get_kernel_max_wg_sz(cl_kernel); #endif /* __CL_DEVICE_ID_H__ */ Beignet-1.1.1-Source/src/cl_program.c000664 001750 001750 00000061446 12577740221 016557 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_kernel.h" #include "cl_program.h" #include "cl_device_id.h" #include "cl_context.h" #include "cl_alloc.h" #include "cl_utils.h" #include "cl_khr_icd.h" #include "cl_gbe_loader.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #include #include #include #include static void cl_program_release_sources(cl_program p) { if (p->source) { cl_free(p->source); p->source = NULL; } } static void cl_program_release_binary(cl_program p) { if (p->binary) { cl_free(p->binary); p->binary = NULL; } } LOCAL void cl_program_delete(cl_program p) { uint32_t ref, i; if (p == NULL) return; /* We are not done with it yet */ if ((ref = atomic_dec(&p->ref_n)) > 1) return; /* Destroy the sources and binary if still allocated */ cl_program_release_sources(p); cl_program_release_binary(p); /* Release the build options. */ if (p->build_opts) { cl_free(p->build_opts); p->build_opts = NULL; } if (p->build_log) { free(p->build_log); p->build_log = NULL; } /* Remove it from the list */ assert(p->ctx); pthread_mutex_lock(&p->ctx->program_lock); if (p->prev) p->prev->next = p->next; if (p->next) p->next->prev = p->prev; if (p->ctx->programs == p) p->ctx->programs = p->next; pthread_mutex_unlock(&p->ctx->program_lock); cl_free(p->bin); /* Free the blob */ for (i = 0; i < p->ker_n; ++i) /* Free the kernels */ cl_kernel_delete(p->ker[i]); cl_free(p->ker); /* Program belongs to their parent context */ cl_context_delete(p->ctx); /* Free the program as allocated by the compiler */ if (p->opaque) { if (CompilerSupported()) compiler_program_clean_llvm_resource(p->opaque); interp_program_delete(p->opaque); } p->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(p); } LOCAL cl_program cl_program_new(cl_context ctx) { cl_program p = NULL; /* Allocate the structure */ TRY_ALLOC_NO_ERR (p, CALLOC(struct _cl_program)); SET_ICD(p->dispatch) p->build_status = CL_BUILD_NONE; p->ref_n = 1; p->magic = CL_MAGIC_PROGRAM_HEADER; p->ctx = ctx; p->build_log = calloc(1000, sizeof(char)); if (p->build_log) p->build_log_max_sz = 1000; /* The queue also belongs to its context */ cl_context_add_ref(ctx); exit: return p; error: cl_program_delete(p); goto exit; } LOCAL void cl_program_add_ref(cl_program p) { assert(p); atomic_inc(&p->ref_n); } static cl_int cl_program_load_gen_program(cl_program p) { cl_int err = CL_SUCCESS; uint32_t i; assert(p->opaque != NULL); p->ker_n = interp_program_get_kernel_num(p->opaque); /* Allocate the kernel array */ TRY_ALLOC (p->ker, CALLOC_ARRAY(cl_kernel, p->ker_n)); for (i = 0; i < p->ker_n; ++i) { const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i); assert(opaque != NULL); TRY_ALLOC (p->ker[i], cl_kernel_new(p)); cl_kernel_setup(p->ker[i], opaque); } error: return err; } inline cl_bool isBitcodeWrapper(const unsigned char *BufPtr, const unsigned char *BufEnd) { // See if you can find the hidden message in the magic bytes :-). // (Hint: it's a little-endian encoding.) return BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && BufPtr[2] == 0x17 && BufPtr[3] == 0x0B; } inline cl_bool isRawBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd) { // These bytes sort of have a hidden message, but it's not in // little-endian this time, and it's a little redundant. return BufPtr != BufEnd && BufPtr[0] == 'B' && BufPtr[1] == 'C' && BufPtr[2] == 0xc0 && BufPtr[3] == 0xde; } #define isBitcode(BufPtr,BufEnd) (isBitcodeWrapper(BufPtr, BufEnd) || isRawBitcode(BufPtr, BufEnd)) LOCAL cl_program cl_program_create_from_binary(cl_context ctx, cl_uint num_devices, const cl_device_id * devices, const size_t * lengths, const unsigned char ** binaries, cl_int * binary_status, cl_int * errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; assert(ctx); INVALID_DEVICE_IF (num_devices != 1); INVALID_DEVICE_IF (devices == NULL); INVALID_DEVICE_IF (devices[0] != ctx->device); INVALID_VALUE_IF (binaries == NULL); INVALID_VALUE_IF (lengths == NULL); if (binaries[0] == NULL) { err = CL_INVALID_VALUE; if (binary_status) binary_status[0] = CL_INVALID_VALUE; goto error; } if (lengths[0] == 0) { err = CL_INVALID_VALUE; if (binary_status) binary_status[0] = CL_INVALID_VALUE; goto error; } program = cl_program_new(ctx); if (UNLIKELY(program == NULL)) { err = CL_OUT_OF_HOST_MEMORY; goto error; } // TODO: Need to check the binary format here to return CL_INVALID_BINARY. TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char))); memcpy(program->binary, binaries[0], lengths[0]); program->binary_sz = lengths[0]; program->source_type = FROM_BINARY; if(isBitcode((unsigned char*)program->binary, (unsigned char*)program->binary+program->binary_sz)) { char* typed_binary; TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char))); memcpy(typed_binary+1, binaries[0], lengths[0]); *typed_binary = 1; program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, typed_binary, program->binary_sz+1); cl_free(typed_binary); if (UNLIKELY(program->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } program->source_type = FROM_LLVM_SPIR; }else if(isBitcode((unsigned char*)program->binary+1, (unsigned char*)program->binary+program->binary_sz)) { if(*program->binary == 1){ program->binary_type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; }else if(*program->binary == 2){ program->binary_type = CL_PROGRAM_BINARY_TYPE_LIBRARY; }else{ err= CL_INVALID_BINARY; goto error; } program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, program->binary, program->binary_sz); if (UNLIKELY(program->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } program->source_type = FROM_LLVM; } else if (*program->binary == 0) { program->opaque = interp_program_new_from_binary(program->ctx->device->device_id, program->binary, program->binary_sz); if (UNLIKELY(program->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, program); program->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; } if (binary_status) binary_status[0] = CL_SUCCESS; exit: if (errcode_ret) *errcode_ret = err; return program; error: cl_program_delete(program); program = NULL; goto exit; return CL_SUCCESS; } LOCAL cl_program cl_program_create_with_built_in_kernles(cl_context ctx, cl_uint num_devices, const cl_device_id * devices, const char * kernel_names, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; assert(ctx); INVALID_DEVICE_IF (num_devices != 1); INVALID_DEVICE_IF (devices == NULL); INVALID_DEVICE_IF (devices[0] != ctx->device); cl_int binary_status = CL_SUCCESS; extern char cl_internal_built_in_kernel_str[]; extern size_t cl_internal_built_in_kernel_str_size; char* p_built_in_kernel_str =cl_internal_built_in_kernel_str; ctx->built_in_prgs = cl_program_create_from_binary(ctx, 1, &ctx->device, (size_t*)&cl_internal_built_in_kernel_str_size, (const unsigned char **)&p_built_in_kernel_str, &binary_status, &err); if (!ctx->built_in_prgs) return NULL; err = cl_program_build(ctx->built_in_prgs, NULL); if (err != CL_SUCCESS) return NULL; ctx->built_in_prgs->is_built = 1; char delims[] = ";"; char* saveptr = NULL; char* local_kernel_names; char* kernel = NULL; char* matched_kernel; int i = 0; //copy the content to local_kernel_names to protect the kernel_names. TRY_ALLOC(local_kernel_names, cl_calloc(strlen(kernel_names)+1, sizeof(char) ) ); memcpy(local_kernel_names, kernel_names, strlen(kernel_names)+1); kernel = strtok_r( local_kernel_names, delims , &saveptr); while( kernel != NULL ) { matched_kernel = strstr(ctx->device->built_in_kernels, kernel); if(matched_kernel){ for (i = 0; i < ctx->built_in_prgs->ker_n; ++i) { assert(ctx->built_in_prgs->ker[i]); const char *ker_name = cl_kernel_get_name(ctx->built_in_prgs->ker[i]); if (strcmp(ker_name, kernel) == 0) { break; } } ctx->built_in_kernels[i] = cl_program_create_kernel(ctx->built_in_prgs, kernel, NULL); } kernel = strtok_r((char*)saveptr , delims, &saveptr ); } cl_free(local_kernel_names); exit: if (errcode_ret) *errcode_ret = err; return ctx->built_in_prgs; error: goto exit; return CL_SUCCESS; } LOCAL cl_program cl_program_create_from_llvm(cl_context ctx, cl_uint num_devices, const cl_device_id *devices, const char *file_name, cl_int *errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; assert(ctx); INVALID_DEVICE_IF (num_devices != 1); INVALID_DEVICE_IF (devices == NULL); INVALID_DEVICE_IF (devices[0] != ctx->device); INVALID_VALUE_IF (file_name == NULL); program = cl_program_new(ctx); if (UNLIKELY(program == NULL)) { err = CL_OUT_OF_HOST_MEMORY; goto error; } program->opaque = compiler_program_new_from_llvm(ctx->device->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1); if (UNLIKELY(program->opaque == NULL)) { err = CL_INVALID_PROGRAM; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, program); program->source_type = FROM_LLVM; exit: if (errcode_ret) *errcode_ret = err; return program; error: cl_program_delete(program); program = NULL; goto exit; } LOCAL cl_program cl_program_create_from_source(cl_context ctx, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { cl_program program = NULL; cl_int err = CL_SUCCESS; cl_uint i; int32_t * lens = NULL; int32_t len_total = 0; assert(ctx); char * p = NULL; // the real compilation step will be done at build time since we do not have // yet the compilation options program = cl_program_new(ctx); if (UNLIKELY(program == NULL)) { err = CL_OUT_OF_HOST_MEMORY; goto error; } TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t))); for (i = 0; i < (int) count; ++i) { size_t len; if (lengths == NULL || lengths[i] == 0) len = strlen(strings[i]); else len = lengths[i]; lens[i] = len; len_total += len; } TRY_ALLOC(program->source, cl_calloc(len_total+1, sizeof(char))); p = program->source; for (i = 0; i < (int) count; ++i) { memcpy(p, strings[i], lens[i]); p += lens[i]; } *p = '\0'; program->source_type = FROM_SOURCE; program->binary_type = CL_PROGRAM_BINARY_TYPE_NONE; exit: cl_free(lens); lens = NULL; if (errcode_ret) *errcode_ret = err; return program; error: cl_program_delete(program); program = NULL; goto exit; } /* Before we do the real work, we need to check whether our platform cl version can meet -cl-std= */ static int check_cl_version_option(cl_program p, const char* options) { const char* s = NULL; int ver1 = 0; int ver2 = 0; char version_str[64]; if (options && (s = strstr(options, "-cl-std="))) { if (s + strlen("-cl-std=CLX.X") > options + strlen(options)) { return 0; } if (s[8] != 'C' || s[9] != 'L' || s[10] > '9' || s[10] < '0' || s[11] != '.' || s[12] > '9' || s[12] < '0') { return 0; } ver1 = (s[10] - '0') * 10 + (s[12] - '0'); if (cl_get_device_info(p->ctx->device, CL_DEVICE_OPENCL_C_VERSION, sizeof(version_str), version_str, NULL) != CL_SUCCESS) return 0; assert(strstr(version_str, "OpenCL") && version_str[0] == 'O'); ver2 = (version_str[9] - '0') * 10 + (version_str[11] - '0'); if (ver2 < ver1) return 0; return 1; } return 1; } LOCAL cl_int cl_program_build(cl_program p, const char *options) { cl_int err = CL_SUCCESS; int i = 0; int copyed = 0; if (p->ref_n > 1) { err = CL_INVALID_OPERATION; goto error; } if (!check_cl_version_option(p, options)) { err = CL_BUILD_PROGRAM_FAILURE; goto error; } if (options) { if(p->build_opts == NULL || strcmp(options, p->build_opts) != 0) { if(p->build_opts) { cl_free(p->build_opts); p->build_opts = NULL; } TRY_ALLOC (p->build_opts, cl_calloc(strlen(options) + 1, sizeof(char))); memcpy(p->build_opts, options, strlen(options)); p->source_type = p->source ? FROM_SOURCE : p->binary ? FROM_BINARY : FROM_LLVM; if (strstr(options, "-x spir")) { p->source_type = FROM_LLVM_SPIR; } } } if (options == NULL && p->build_opts) { p->source_type = p->source ? FROM_SOURCE : p->binary ? FROM_BINARY : FROM_LLVM; cl_free(p->build_opts); p->build_opts = NULL; } if (p->source_type == FROM_SOURCE) { if (!CompilerSupported()) { err = CL_COMPILER_NOT_AVAILABLE; goto error; } p->opaque = compiler_program_new_from_source(p->ctx->device->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz); if (UNLIKELY(p->opaque == NULL)) { if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'")) err = CL_INVALID_BUILD_OPTIONS; else err = CL_BUILD_PROGRAM_FAILURE; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, p); } else if (p->source_type == FROM_LLVM || p->source_type == FROM_LLVM_SPIR) { if (!CompilerSupported()) { err = CL_COMPILER_NOT_AVAILABLE; goto error; } compiler_program_build_from_llvm(p->opaque, p->build_log_max_sz, p->build_log, &p->build_log_sz, options); if (UNLIKELY(p->opaque == NULL)) { if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'")) err = CL_INVALID_BUILD_OPTIONS; else err = CL_BUILD_PROGRAM_FAILURE; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, p); } else if (p->source_type == FROM_BINARY) { p->opaque = interp_program_new_from_binary(p->ctx->device->device_id, p->binary, p->binary_sz); if (UNLIKELY(p->opaque == NULL)) { err = CL_BUILD_PROGRAM_FAILURE; goto error; } /* Create all the kernels */ TRY (cl_program_load_gen_program, p); } p->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; for (i = 0; i < p->ker_n; i ++) { const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i); p->bin_sz += interp_kernel_get_code_size(opaque); } TRY_ALLOC (p->bin, cl_calloc(p->bin_sz, sizeof(char))); for (i = 0; i < p->ker_n; i ++) { const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i); size_t sz = interp_kernel_get_code_size(opaque); memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz); copyed += sz; } p->is_built = 1; p->build_status = CL_BUILD_SUCCESS; return CL_SUCCESS; error: p->build_status = CL_BUILD_ERROR; return err; } cl_program cl_program_link(cl_context context, cl_uint num_input_programs, const cl_program * input_programs, const char * options, cl_int* errcode_ret) { cl_program p = NULL; cl_int err = CL_SUCCESS; cl_int i = 0; int copyed = 0; cl_bool ret = 0; int avialable_program = 0; //Although we don't use options, but still need check options if(!compiler_program_check_opt(options)) { err = CL_INVALID_LINKER_OPTIONS; goto error; } for(i = 0; i < num_input_programs; i++) { //num_input_programs >0 and input_programs MUST not NULL, so compare with input_programs[0] directly. if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY || input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { avialable_program++; } } //None of program contain a compilerd binary or library. if(avialable_program == 0) { goto done; } //Must all of program contain a compilerd binary or library. if(avialable_program < num_input_programs) { err = CL_INVALID_OPERATION; goto error; } p = cl_program_new(context); if (UNLIKELY(p == NULL)) { err = CL_OUT_OF_HOST_MEMORY; goto error; } if (!check_cl_version_option(p, options)) { err = CL_BUILD_PROGRAM_FAILURE; goto error; } p->opaque = compiler_program_new_gen_program(context->device->device_id, NULL, NULL); for(i = 0; i < num_input_programs; i++) { // if program create with llvm binary, need deserilize first to get module. if(input_programs[i]) ret = compiler_program_link_program(p->opaque, input_programs[i]->opaque, p->build_log_max_sz, p->build_log, &p->build_log_sz); if (UNLIKELY(ret)) { err = CL_LINK_PROGRAM_FAILURE; goto error; } } if(options && strstr(options, "-create-library")){ p->binary_type = CL_PROGRAM_BINARY_TYPE_LIBRARY; goto done; }else{ p->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; } compiler_program_build_from_llvm(p->opaque, p->build_log_max_sz, p->build_log, &p->build_log_sz, options); /* Create all the kernels */ TRY (cl_program_load_gen_program, p); for (i = 0; i < p->ker_n; i ++) { const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i); p->bin_sz += interp_kernel_get_code_size(opaque); } TRY_ALLOC (p->bin, cl_calloc(p->bin_sz, sizeof(char))); for (i = 0; i < p->ker_n; i ++) { const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i); size_t sz = interp_kernel_get_code_size(opaque); memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz); copyed += sz; } done: if(p) p->is_built = 1; if(p) p->build_status = CL_BUILD_SUCCESS; if (errcode_ret) *errcode_ret = err; return p; error: if(p) p->build_status = CL_BUILD_ERROR; if (errcode_ret) *errcode_ret = err; return p; } LOCAL cl_int cl_program_compile(cl_program p, cl_uint num_input_headers, const cl_program * input_headers, const char ** header_include_names, const char* options) { cl_int err = CL_SUCCESS; int i = 0; if (p->ref_n > 1) { err = CL_INVALID_OPERATION; goto error; } if (!check_cl_version_option(p, options)) { err = CL_BUILD_PROGRAM_FAILURE; goto error; } if (options) { if(p->build_opts == NULL || strcmp(options, p->build_opts) != 0) { if(p->build_opts) { cl_free(p->build_opts); p->build_opts = NULL; } TRY_ALLOC (p->build_opts, cl_calloc(strlen(options) + 1, sizeof(char))); memcpy(p->build_opts, options, strlen(options)); p->source_type = p->source ? FROM_SOURCE : p->binary ? FROM_BINARY : FROM_LLVM; } } if (options == NULL && p->build_opts) { p->source_type = p->source ? FROM_SOURCE : p->binary ? FROM_BINARY : FROM_LLVM; cl_free(p->build_opts); p->build_opts = NULL; } char temp_header_template[]= "/tmp/beignet.XXXXXX"; char* temp_header_path = mkdtemp(temp_header_template); if (p->source_type == FROM_SOURCE) { if (!CompilerSupported()) { err = CL_COMPILER_NOT_AVAILABLE; goto error; } //write the headers to /tmp/beignet.XXXXXX for include. for (i = 0; i < num_input_headers; i++) { if(header_include_names[i] == NULL || input_headers[i] == NULL) continue; char temp_path[255]=""; strncpy(temp_path, temp_header_path, strlen(temp_header_path)); strncat(temp_path, "/", 1); strncat(temp_path, header_include_names[i], strlen(header_include_names[i])); char* dirc = strdup(temp_path); char* dir = dirname(dirc); mkdir(dir, 0755); if(access(dir, R_OK|W_OK) != 0){ err = CL_COMPILE_PROGRAM_FAILURE; goto error; } free(dirc); FILE* pfile = fopen(temp_path, "wb"); if(pfile){ fwrite(input_headers[i]->source, strlen(input_headers[i]->source), 1, pfile); fclose(pfile); }else{ err = CL_COMPILE_PROGRAM_FAILURE; goto error; } } p->opaque = compiler_program_compile_from_source(p->ctx->device->device_id, p->source, temp_header_path, p->build_log_max_sz, options, p->build_log, &p->build_log_sz); char rm_path[255]="rm "; strncat(rm_path, temp_header_path, strlen(temp_header_path)); strncat(rm_path, " -rf", 4); int temp = system(rm_path); if(temp){ assert(0); } if (UNLIKELY(p->opaque == NULL)) { if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'")) err = CL_INVALID_COMPILER_OPTIONS; else err = CL_COMPILE_PROGRAM_FAILURE; goto error; } /* Create all the kernels */ p->source_type = FROM_LLVM; p->binary_type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; }else if(p->source_type == FROM_BINARY){ err = CL_INVALID_OPERATION; return err; } p->is_built = 1; p->build_status = CL_BUILD_SUCCESS; return CL_SUCCESS; error: p->build_status = CL_BUILD_ERROR; return err; } LOCAL cl_kernel cl_program_create_kernel(cl_program p, const char *name, cl_int *errcode_ret) { cl_kernel from = NULL, to = NULL; cl_int err = CL_SUCCESS; uint32_t i = 0; /* Find the program first */ for (i = 0; i < p->ker_n; ++i) { assert(p->ker[i]); const char *ker_name = cl_kernel_get_name(p->ker[i]); if (strcmp(ker_name, name) == 0) { from = p->ker[i]; break; } } /* We were not able to find this named kernel */ if (UNLIKELY(from == NULL)) { err = CL_INVALID_KERNEL_NAME; goto error; } TRY_ALLOC(to, cl_kernel_dup(from)); exit: if (errcode_ret) *errcode_ret = err; return to; error: cl_kernel_delete(to); to = NULL; goto exit; } LOCAL cl_int cl_program_create_kernels_in_program(cl_program p, cl_kernel* ker) { int i = 0; if(ker == NULL) return CL_SUCCESS; for (i = 0; i < p->ker_n; ++i) { TRY_ALLOC_NO_ERR(ker[i], cl_kernel_dup(p->ker[i])); } return CL_SUCCESS; error: do { cl_kernel_delete(ker[i]); ker[i--] = NULL; } while(i > 0); return CL_OUT_OF_HOST_MEMORY; } LOCAL void cl_program_get_kernel_names(cl_program p, size_t size, char *names, size_t *size_ret) { int i = 0; const char *ker_name = NULL; size_t len = 0; if(size_ret) *size_ret = 0; if(p->ker == NULL) { return; } ker_name = cl_kernel_get_name(p->ker[i]); len = strlen(ker_name); if(names) { strncpy(names, cl_kernel_get_name(p->ker[0]), size - 1); if(size < len - 1) { if(size_ret) *size_ret = size; return; } size = size - len - 1; //sub \0 } if(size_ret) *size_ret = strlen(ker_name) + 1; //add NULL for (i = 1; i < p->ker_n; ++i) { ker_name = cl_kernel_get_name(p->ker[i]); len = strlen(ker_name); if(names) { strncat(names, ";", size); if(size >= 1) strncat(names, ker_name, size - 1); if(size < len + 1) { if(size_ret) *size_ret = size; break; } size = size - len - 1; } if(size_ret) *size_ret += len + 1; //add ';' } } Beignet-1.1.1-Source/src/cl_driver.cpp000664 001750 001750 00000002264 12576733264 016744 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ extern "C" { #include "intel/intel_driver.h" #include "cl_utils.h" #include #include } namespace { /*! Just use c++ pre-main to initialize the call-backs */ struct OCLDriverCallBackInitializer { OCLDriverCallBackInitializer(void) { intel_setup_callbacks(); } }; /*! Set the call backs at pre-main time */ static OCLDriverCallBackInitializer cbInitializer; } /* namespace */ Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl000664 001750 001750 00000002013 12576733264 027000 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_array_to_2d_array(__read_only image2d_array_t src_image, __write_only image2d_array_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int4 src_coord; int4 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2 + k; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2 + k; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl000664 001750 001750 00000001777 12576733264 026377 0ustar00yryr000000 000000 kernel void __cl_copy_region_unalign_dst_offset ( global int* src, unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask, unsigned int shift, unsigned int dw_mask) { int i = get_global_id(0); unsigned int tmp = 0; if (i > size -1) return; /* last dw, need to be careful, not to overflow the source. */ if ((i == size - 1) && ((last_mask & (~(~dw_mask >> shift))) == 0)) { tmp = ((src[src_offset + i] & ~dw_mask) >> shift); } else { tmp = ((src[src_offset + i] & ~dw_mask) >> shift) | ((src[src_offset + i + 1] & dw_mask) << (32 - shift)); } if (i == 0) { dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask)); } else if (i == size - 1) { dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask)); } else { dst[i+dst_offset] = tmp; } } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_rect_align4.cl000664 001750 001750 00000001426 12576733264 024704 0ustar00yryr000000 000000 kernel void __cl_copy_buffer_rect_align4 ( global int* src, global int* dst, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_offset, unsigned int dst_offset, unsigned int src_row_pitch, unsigned int src_slice_pitch, unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); if((i >= region0) || (j>= region1) || (k>=region2)) return; src_offset += k * src_slice_pitch + j * src_row_pitch + i; dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; dst[dst_offset] = src[src_offset]; } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_align4.cl000664 001750 001750 00000000442 12576733264 023664 0ustar00yryr000000 000000 kernel void __cl_copy_region_align4 ( global float* src, unsigned int src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { int i = get_global_id(0); if (i < size) dst[i+dst_offset] = src[i+src_offset]; } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl000664 001750 001750 00000001635 12576733264 027007 0ustar00yryr000000 000000 kernel void __cl_copy_image_1d_array_to_1d_array(__read_only image1d_array_t src_image, __write_only image1d_array_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int2 src_coord; int2 dst_coord; if((i >= region0) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin2 + k; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin2 + k; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_image_2d.cl000664 001750 001750 00000001016 12576733264 023273 0ustar00yryr000000 000000 kernel void __cl_fill_image_2d( __write_only image2d_t image, float4 pattern, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int origin0, unsigned int origin1, unsigned int origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int2 coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; coord.x = origin0 + i; coord.y = origin1 + j; write_imagef(image, coord, pattern); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_to_buffer.cl000664 001750 001750 00000001540 12576733264 025354 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_to_buffer( __read_only image2d_t image, global uchar* buffer, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_offset) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); uint4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int2 src_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; color = read_imageui(image, sampler, src_coord); dst_offset += (k * region1 + j) * region0 + i; buffer[dst_offset] = color.x; } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_1d_to_1d.cl000664 001750 001750 00000001551 12576733264 024410 0ustar00yryr000000 000000 kernel void __cl_copy_image_1d_to_1d(__read_only image1d_t src_image, __write_only image1d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int src_coord; int dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord = src_origin0 + i; dst_coord = dst_origin0 + i; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_3d_to_buffer.cl000664 001750 001750 00000001676 12576733264 025367 0ustar00yryr000000 000000 #define IMAGE_TYPE image3d_t #define COORD_TYPE int4 kernel void __cl_copy_image_3d_to_buffer ( __read_only IMAGE_TYPE image, global uchar* buffer, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_offset) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); uint4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; COORD_TYPE src_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2 + k; color = read_imageui(image, sampler, src_coord); dst_offset += (k * region1 + j) * region0 + i; buffer[dst_offset] = color.x; } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_image_2d_array.cl000664 001750 001750 00000001063 12576733264 024473 0ustar00yryr000000 000000 kernel void __cl_fill_image_2d_array( __write_only image2d_array_t image, float4 pattern, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int origin0, unsigned int origin1, unsigned int origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; coord.x = origin0 + i; coord.y = origin1 + j; coord.z = origin2 + k; write_imagef(image, coord, pattern); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl000664 001750 001750 00000001656 12576733264 025616 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_array_to_2d(__read_only image2d_array_t src_image, __write_only image2d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int4 src_coord; int2 dst_coord; if((i >= region0) || (j>= region1)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_buf_align4.cl000664 001750 001750 00000000335 12576733264 023641 0ustar00yryr000000 000000 kernel void __cl_fill_region_align4 ( global float* dst, float pattern, unsigned int offset, unsigned int size) { int i = get_global_id(0); if (i < size) { dst[i+offset] = pattern; } } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl000664 001750 001750 00000001777 12576733264 025623 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_array_to_3d(__read_only image2d_array_t src_image, __write_only image3d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int4 src_coord; int4 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2 + k; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2 + k; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl000664 001750 001750 00000002045 12576733264 026361 0ustar00yryr000000 000000 kernel void __cl_copy_region_unalign_src_offset ( global int* src, unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask, unsigned int shift, unsigned int dw_mask, int src_less) { int i = get_global_id(0); unsigned int tmp = 0; if (i > size -1) return; if (i == 0) { tmp = ((src[src_offset + i] & dw_mask) << shift); } else if (src_less && i == size - 1) { // not exceed the bound of source tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)); } else { tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)) | ((src[src_offset + i] & dw_mask) << shift); } if (i == 0) { dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask)); } else if (i == size - 1) { dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask)); } else { dst[i+dst_offset] = tmp; } } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_to_buffer_align16.cl000664 001750 001750 00000001437 12576733264 026702 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_to_buffer_align16( __read_only image2d_t image, global uint4* buffer, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_offset) { int i = get_global_id(0); int j = get_global_id(1); if((i >= region0) || (j>= region1)) return; uint4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int2 src_coord; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; color = read_imageui(image, sampler, src_coord); *(buffer + dst_offset + region0*j + i) = color; } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl000664 001750 001750 00000001725 12576733264 025613 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_to_2d_array(__read_only image2d_t src_image, __write_only image2d_array_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int2 src_coord; int4 dst_coord; if((i >= region0) || (j>= region1)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_buf_align8.cl000664 001750 001750 00000000656 12576733264 023653 0ustar00yryr000000 000000 #define COMPILER_ABS_FUNC_N(N) \ kernel void __cl_fill_region_align8_##N ( global float##N* dst, float##N pattern, \ unsigned int offset, unsigned int size) { \ int i = get_global_id(0); \ if (i < size) { \ dst[i+offset] = pattern; \ } \ } COMPILER_ABS_FUNC_N(2) COMPILER_ABS_FUNC_N(4) COMPILER_ABS_FUNC_N(8) COMPILER_ABS_FUNC_N(16) Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl000664 001750 001750 00000001777 12576733264 025623 0ustar00yryr000000 000000 kernel void __cl_copy_image_3d_to_2d_array(__read_only image3d_t src_image, __write_only image2d_array_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int4 src_coord; int4 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2 + k; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2 + k; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_to_2d.cl000664 001750 001750 00000001661 12576733264 024414 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_to_2d(__read_only image2d_t src_image, __write_only image2d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int2 src_coord; int2 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_3d_to_2d.cl000664 001750 001750 00000001722 12576733264 024413 0ustar00yryr000000 000000 kernel void __cl_copy_image_3d_to_2d(__read_only image3d_t src_image, __write_only image2d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int4 src_coord; int2 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2 + k; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_buf_unalign.cl000664 001750 001750 00000000337 12576733264 024122 0ustar00yryr000000 000000 kernel void __cl_fill_region_unalign ( global char * dst, char pattern, unsigned int offset, unsigned int size) { int i = get_global_id(0); if (i < size) { dst[i+offset] = pattern; } } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buffer_to_image_3d.cl000664 001750 001750 00000001443 12576733264 025357 0ustar00yryr000000 000000 kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, unsigned int src_offset) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); uint4 color = (uint4)(0); int4 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2 + k; src_offset += (k * region1 + j) * region0 + i; color.x = buffer[src_offset]; write_imageui(image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_image_3d.cl000664 001750 001750 00000001047 12576733264 023300 0ustar00yryr000000 000000 kernel void __cl_fill_image_3d( __write_only image3d_t image, float4 pattern, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int origin0, unsigned int origin1, unsigned int origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; coord.x = origin0 + i; coord.y = origin1 + j; coord.z = origin2 + k; write_imagef(image, coord, pattern); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_3d_to_3d.cl000664 001750 001750 00000001763 12576733264 024421 0ustar00yryr000000 000000 kernel void __cl_copy_image_3d_to_3d(__read_only image3d_t src_image, __write_only image3d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int4 src_coord; int4 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; src_coord.z = src_origin2 + k; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2 + k; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl000664 001750 001750 00000001226 12576733264 026517 0ustar00yryr000000 000000 kernel void __cl_copy_region_unalign_same_offset ( global int* src, unsigned int src_offset, global int* dst, unsigned int dst_offset, unsigned int size, unsigned int first_mask, unsigned int last_mask) { int i = get_global_id(0); if (i > size -1) return; if (i == 0) { dst[dst_offset] = (dst[dst_offset] & first_mask) | (src[src_offset] & (~first_mask)); } else if (i == size - 1) { dst[i+dst_offset] = (src[i+src_offset] & last_mask) | (dst[i+dst_offset] & (~last_mask)); } else { dst[i+dst_offset] = src[i+src_offset]; } } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buffer_to_image_2d_align16.cl000664 001750 001750 00000001316 12576733264 026676 0ustar00yryr000000 000000 kernel void __cl_copy_buffer_to_image_2d_align16(__write_only image2d_t image, global uint4* buffer, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, unsigned int src_offset) { int i = get_global_id(0); int j = get_global_id(1); uint4 color = (uint4)(0); int2 dst_coord; if((i >= region0) || (j>= region1)) return; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; src_offset += j * region0 + i; color = buffer[src_offset]; write_imageui(image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_buf_align2.cl000664 001750 001750 00000000336 12576733264 023640 0ustar00yryr000000 000000 kernel void __cl_fill_region_align2 ( global char2 * dst, char2 pattern, unsigned int offset, unsigned int size) { int i = get_global_id(0); if (i < size) { dst[i+offset] = pattern; } } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_buf_align128.cl000664 001750 001750 00000000467 12576733264 024016 0ustar00yryr000000 000000 kernel void __cl_fill_region_align128 ( global float16* dst, float16 pattern0, unsigned int offset, unsigned int size, float16 pattern1) { int i = get_global_id(0); if (i < size) { dst[i*2+offset] = pattern0; dst[i*2+offset+1] = pattern1; } } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_image_2d_to_3d.cl000664 001750 001750 00000001766 12576733264 024423 0ustar00yryr000000 000000 kernel void __cl_copy_image_2d_to_3d(__read_only image2d_t src_image, __write_only image3d_t dst_image, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int4 color; const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; int2 src_coord; int4 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; src_coord.x = src_origin0 + i; src_coord.y = src_origin1 + j; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; dst_coord.z = dst_origin2 + k; color = read_imagei(src_image, sampler, src_coord); write_imagei(dst_image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_rect.cl000664 001750 001750 00000001421 12576733264 023441 0ustar00yryr000000 000000 kernel void __cl_copy_buffer_rect ( global char* src, global char* dst, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int src_offset, unsigned int dst_offset, unsigned int src_row_pitch, unsigned int src_slice_pitch, unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); if((i >= region0) || (j>= region1) || (k>=region2)) return; src_offset += k * src_slice_pitch + j * src_row_pitch + i; dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; dst[dst_offset] = src[src_offset]; } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_image_1d.cl000664 001750 001750 00000000762 12576733264 023301 0ustar00yryr000000 000000 kernel void __cl_fill_image_1d( __write_only image1d_t image, float4 pattern, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int origin0, unsigned int origin1, unsigned int origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; coord = origin0 + i; write_imagef(image, coord, pattern); } Beignet-1.1.1-Source/src/kernels/cl_internal_fill_image_1d_array.cl000664 001750 001750 00000001032 12576733264 024466 0ustar00yryr000000 000000 kernel void __cl_fill_image_1d_array( __write_only image1d_array_t image, float4 pattern, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int origin0, unsigned int origin1, unsigned int origin2) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); int2 coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; coord.x = origin0 + i; coord.y = origin2 + k; write_imagef(image, coord, pattern); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buffer_to_image_2d.cl000664 001750 001750 00000001402 12576733264 025351 0ustar00yryr000000 000000 kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer, unsigned int region0, unsigned int region1, unsigned int region2, unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, unsigned int src_offset) { int i = get_global_id(0); int j = get_global_id(1); int k = get_global_id(2); uint4 color = (uint4)(0); int2 dst_coord; if((i >= region0) || (j>= region1) || (k>=region2)) return; dst_coord.x = dst_origin0 + i; dst_coord.y = dst_origin1 + j; src_offset += (k * region1 + j) * region0 + i; color.x = buffer[src_offset]; write_imageui(image, dst_coord, color); } Beignet-1.1.1-Source/src/kernels/cl_internal_copy_buf_align16.cl000664 001750 001750 00000000730 12576733264 023747 0ustar00yryr000000 000000 kernel void __cl_copy_region_align16 ( global float* src, unsigned int src_offset, global float* dst, unsigned int dst_offset, unsigned int size) { int i = get_global_id(0) * 4; if (i < size*4) { dst[i+dst_offset] = src[i+src_offset]; dst[i+dst_offset + 1] = src[i+src_offset + 1]; dst[i+dst_offset + 2] = src[i+src_offset + 2]; dst[i+dst_offset + 3] = src[i+src_offset + 3]; } } Beignet-1.1.1-Source/src/cl_image.c000664 001750 001750 00000021472 12576733264 016175 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_image.h" #include "cl_utils.h" #include "intel/intel_defines.h" #include LOCAL cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) { assert(bpp); if(fmt == NULL) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; const uint32_t type = fmt->image_channel_data_type; const uint32_t order = fmt->image_channel_order; switch (type) { #define DECL_BPP(DATA_TYPE, VALUE) case DATA_TYPE: *bpp = VALUE; DECL_BPP(CL_SNORM_INT8, 1); break; DECL_BPP(CL_SNORM_INT16, 2); break; DECL_BPP(CL_UNORM_INT8, 1); break; DECL_BPP(CL_UNORM_INT16, 2); break; DECL_BPP(CL_UNORM_SHORT_565, 2); if (order != CL_RGBx && order != CL_RGB) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; DECL_BPP(CL_UNORM_SHORT_555, 2); if (order != CL_RGBx && order != CL_RGB) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; DECL_BPP(CL_UNORM_INT_101010, 4); if (order != CL_RGBx && order != CL_RGB) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; DECL_BPP(CL_SIGNED_INT8, 1); break; DECL_BPP(CL_SIGNED_INT16, 2); break; DECL_BPP(CL_SIGNED_INT32, 4); break; DECL_BPP(CL_UNSIGNED_INT8, 1); break; DECL_BPP(CL_UNSIGNED_INT16, 2); break; DECL_BPP(CL_UNSIGNED_INT32, 4); break; DECL_BPP(CL_HALF_FLOAT, 2); break; DECL_BPP(CL_FLOAT, 4); break; #undef DECL_BPP default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; }; switch (order) { case CL_Rx: break; case CL_R: break; case CL_A: break; case CL_RA: *bpp *= 2; break; case CL_RG: *bpp *= 2; break; case CL_INTENSITY: case CL_LUMINANCE: if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && type != CL_HALF_FLOAT && type != CL_FLOAT) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; case CL_RGB: case CL_RGBx: if (type != CL_UNORM_SHORT_555 && type != CL_UNORM_SHORT_565 && type != CL_UNORM_INT_101010) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; break; case CL_RGBA: *bpp *= 4; break; case CL_ARGB: case CL_BGRA: if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 && type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; *bpp *= 4; break; default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; }; return CL_SUCCESS; } LOCAL uint32_t cl_image_get_intel_format(const cl_image_format *fmt) { const uint32_t type = fmt->image_channel_data_type; const uint32_t order = fmt->image_channel_order; switch (order) { case CL_R: #if 0 case CL_Rx: case CL_A: case CL_INTENSITY: case CL_LUMINANCE: if ((order == CL_INTENSITY || order == CL_LUMINANCE) && (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 && type != CL_SNORM_INT8 && type != CL_SNORM_INT16 && type != CL_HALF_FLOAT && type != CL_FLOAT)) return INTEL_UNSUPPORTED_FORMAT; #endif /* XXX it seems we have some acuracy compatible issue with snomr_int8/16, * have to disable those formats currently. */ switch (type) { case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16_FLOAT; case CL_FLOAT: return I965_SURFACEFORMAT_R32_FLOAT; // case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16_SNORM; // case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8_SNORM; case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8_UNORM; case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16_UNORM; case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8_SINT; case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16_SINT; case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32_SINT; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8_UINT; case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16_UINT; case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; case CL_RG: switch (type) { case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; #if 0 case CL_RG: case CL_RA: switch (type) { case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16_FLOAT; case CL_FLOAT: return I965_SURFACEFORMAT_R32G32_FLOAT; case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16_SNORM; case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8_SNORM; case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM; case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16_UNORM; case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8_SINT; case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16_SINT; case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32_SINT; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT; case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT; case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; case CL_RGB: case CL_RGBx: switch (type) { case CL_UNORM_INT_101010: return I965_SURFACEFORMAT_R10G10B10A2_UNORM; case CL_UNORM_SHORT_565: case CL_UNORM_SHORT_555: default: return INTEL_UNSUPPORTED_FORMAT; }; #endif case CL_RGBA: switch (type) { case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16B16A16_FLOAT; case CL_FLOAT: return I965_SURFACEFORMAT_R32G32B32A32_FLOAT; // case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SNORM; // case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SNORM; case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UNORM; case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UNORM; case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SINT; case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SINT; case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_SINT; case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UINT; case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UINT; case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_UINT; default: return INTEL_UNSUPPORTED_FORMAT; }; case CL_ARGB: return INTEL_UNSUPPORTED_FORMAT; case CL_BGRA: switch (type) { case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM; default: return INTEL_UNSUPPORTED_FORMAT; }; default: return INTEL_UNSUPPORTED_FORMAT; }; } static const uint32_t cl_image_order[] = { CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB, CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx }; static const uint32_t cl_image_type[] = { CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT }; static const size_t cl_image_order_n = SIZEOF32(cl_image_order); static const size_t cl_image_type_n = SIZEOF32(cl_image_type); cl_int cl_image_get_supported_fmt(cl_context ctx, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats) { size_t i, j, n = 0; for (i = 0; i < cl_image_order_n; ++i) for (j = 0; j < cl_image_type_n; ++j) { const cl_image_format fmt = { .image_channel_order = cl_image_order[i], .image_channel_data_type = cl_image_type[j] }; const uint32_t intel_fmt = cl_image_get_intel_format(&fmt); if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) continue; if (n < num_entries && image_formats) image_formats[n] = fmt; n++; } if (num_image_formats) *num_image_formats = n; return CL_SUCCESS; } Beignet-1.1.1-Source/src/cl_command_queue_gen7.c000664 001750 001750 00000036543 12605356050 020645 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_command_queue.h" #include "cl_context.h" #include "cl_program.h" #include "cl_kernel.h" #include "cl_device_id.h" #include "cl_mem.h" #include "cl_utils.h" #include "cl_alloc.h" #include #include #include #define MAX_GROUP_SIZE_IN_HALFSLICE 512 static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; } /* "Varing" payload is the part of the curbe that changes accross threads in the * same work group. Right now, it consists in local IDs and block IPs */ static cl_int cl_set_varying_payload(const cl_kernel ker, char *data, const size_t *local_wk_sz, size_t simd_sz, size_t cst_sz, size_t thread_n) { uint32_t *ids[3] = {NULL,NULL,NULL}; uint16_t *block_ips = NULL; size_t i, j, k, curr = 0; int32_t id_offset[3], ip_offset; cl_int err = CL_SUCCESS; int32_t dw_ip_offset = -1; id_offset[0] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0); id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0); id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0); ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0); if (ip_offset < 0) dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0); assert(ip_offset < 0 || dw_ip_offset < 0); assert(id_offset[0] >= 0 && id_offset[1] >= 0 && id_offset[2] >= 0 && (ip_offset >= 0 || dw_ip_offset >= 0)); TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz)); TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz)); /* 0xffff means that the lane is inactivated */ memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz); /* Compute the IDs and the block IPs */ for (k = 0; k < local_wk_sz[2]; ++k) for (j = 0; j < local_wk_sz[1]; ++j) for (i = 0; i < local_wk_sz[0]; ++i, ++curr) { ids[0][curr] = i; ids[1][curr] = j; ids[2][curr] = k; block_ips[curr] = 0; } /* Copy them to the curbe buffer */ curr = 0; for (i = 0; i < thread_n; ++i, data += cst_sz) { uint32_t *ids0 = (uint32_t *) (data + id_offset[0]); uint32_t *ids1 = (uint32_t *) (data + id_offset[1]); uint32_t *ids2 = (uint32_t *) (data + id_offset[2]); uint16_t *ips = (uint16_t *) (data + ip_offset); uint32_t *dw_ips = (uint32_t *) (data + dw_ip_offset); for (j = 0; j < simd_sz; ++j, ++curr) { ids0[j] = ids[0][curr]; ids1[j] = ids[1][curr]; ids2[j] = ids[2][curr]; if (ip_offset >= 0) ips[j] = block_ips[curr]; if (dw_ip_offset >= 0) dw_ips[j] = block_ips[curr]; } } error: return err; } static int cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker) { /* calculate constant buffer size * we need raw_size & aligned_size */ GET_QUEUE_THREAD_GPGPU(queue); int32_t arg; size_t offset = 0; uint32_t raw_size = 0, aligned_size =0; gbe_program prog = ker->program->opaque; const int32_t arg_n = interp_kernel_get_arg_num(ker->opaque); size_t global_const_size = interp_program_get_global_constant_size(prog); raw_size = global_const_size; // Surface state need 4 byte alignment, and Constant argument's buffer size // have align to 4 byte when alloc, so align global constant size to 4 can // ensure the finally aligned_size align to 4. aligned_size = ALIGN(raw_size, 4); /* Reserve 8 bytes to get rid of 0 address */ if(global_const_size == 0) aligned_size = 8; for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = interp_kernel_get_arg_type(ker->opaque, arg); if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) { uint32_t alignment = interp_kernel_get_arg_align(ker->opaque, arg); assert(alignment != 0); cl_mem mem = ker->args[arg].mem; raw_size += mem->size; aligned_size = ALIGN(aligned_size, alignment); aligned_size += mem->size; } } if(raw_size == 0) return 0; cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, aligned_size, BTI_CONSTANT); if (bo == NULL) return -1; cl_buffer_map(bo, 1); char * cst_addr = cl_buffer_get_virtual(bo); if (cst_addr == NULL) return -1; /* upload the global constant data */ if (global_const_size > 0) { interp_program_get_global_constant_data(prog, (char*)(cst_addr+offset)); offset += global_const_size; } /* reserve 8 bytes to get rid of 0 address */ if(global_const_size == 0) { offset = 8; } /* upload constant buffer argument */ int32_t curbe_offset = 0; for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = interp_kernel_get_arg_type(ker->opaque, arg); if (type == GBE_ARG_CONSTANT_PTR && ker->args[arg].mem) { cl_mem mem = ker->args[arg].mem; uint32_t alignment = interp_kernel_get_arg_align(ker->opaque, arg); offset = ALIGN(offset, alignment); curbe_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg); assert(curbe_offset >= 0); *(uint32_t *) (ker->curbe + curbe_offset) = offset; cl_buffer_map(mem->bo, 1); void * addr = cl_buffer_get_virtual(mem->bo); memcpy(cst_addr + offset, addr, mem->size); cl_buffer_unmap(mem->bo); offset += mem->size; } } cl_buffer_unmap(bo); return 0; } /* Will return the total amount of slm used */ static int32_t cl_curbe_fill(cl_kernel ker, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz, size_t thread_n) { int32_t offset; #define UPLOAD(ENUM, VALUE) \ if ((offset = interp_kernel_get_curbe_offset(ker->opaque, ENUM, 0)) >= 0) \ *((uint32_t *) (ker->curbe + offset)) = VALUE; UPLOAD(GBE_CURBE_LOCAL_SIZE_X, local_wk_sz[0]); UPLOAD(GBE_CURBE_LOCAL_SIZE_Y, local_wk_sz[1]); UPLOAD(GBE_CURBE_LOCAL_SIZE_Z, local_wk_sz[2]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_X, global_wk_sz[0]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_Y, global_wk_sz[1]); UPLOAD(GBE_CURBE_GLOBAL_SIZE_Z, global_wk_sz[2]); UPLOAD(GBE_CURBE_GLOBAL_OFFSET_X, global_wk_off[0]); UPLOAD(GBE_CURBE_GLOBAL_OFFSET_Y, global_wk_off[1]); UPLOAD(GBE_CURBE_GLOBAL_OFFSET_Z, global_wk_off[2]); UPLOAD(GBE_CURBE_GROUP_NUM_X, global_wk_sz[0]/local_wk_sz[0]); UPLOAD(GBE_CURBE_GROUP_NUM_Y, global_wk_sz[1]/local_wk_sz[1]); UPLOAD(GBE_CURBE_GROUP_NUM_Z, global_wk_sz[2]/local_wk_sz[2]); UPLOAD(GBE_CURBE_THREAD_NUM, thread_n); UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD /* get_sub_group_id needs it */ if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) { const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); uint32_t *laneid = (uint32_t *) (ker->curbe + offset); int32_t i; for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; } /* Write identity for the stack pointer. This is required by the stack pointer * computation in the kernel */ if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_STACK_POINTER, 0)) >= 0) { const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); uint32_t *stackptr = (uint32_t *) (ker->curbe + offset); int32_t i; for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i; } /* Handle the various offsets to SLM */ const int32_t arg_n = interp_kernel_get_arg_num(ker->opaque); int32_t arg, slm_offset = interp_kernel_get_slm_size(ker->opaque); ker->local_mem_sz = 0; for (arg = 0; arg < arg_n; ++arg) { const enum gbe_arg_type type = interp_kernel_get_arg_type(ker->opaque, arg); if (type != GBE_ARG_LOCAL_PTR) continue; uint32_t align = interp_kernel_get_arg_align(ker->opaque, arg); assert(align != 0); slm_offset = ALIGN(slm_offset, align); offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_KERNEL_ARGUMENT, arg); assert(offset >= 0); uint32_t *slmptr = (uint32_t *) (ker->curbe + offset); *slmptr = slm_offset; slm_offset += ker->args[arg].local_sz; ker->local_mem_sz += ker->args[arg].local_sz; } return slm_offset; } static void cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) { cl_context ctx = ker->program->ctx; cl_device_id device = ctx->device; const int32_t per_lane_stack_sz = ker->stack_size; const int32_t value = GBE_CURBE_EXTRA_ARGUMENT; const int32_t sub_value = GBE_STACK_BUFFER; const int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, sub_value); int32_t stack_sz = per_lane_stack_sz; /* No stack required for this kernel */ if (per_lane_stack_sz == 0) return; /* The stack size is given for *each* SIMD lane. So, we accordingly compute * the size we need for the complete machine */ assert(offset >= 0); stack_sz *= interp_kernel_get_simd_width(ker->opaque); stack_sz *= device->max_compute_unit * ctx->device->max_thread_per_unit; /* Because HSW calc stack offset per thread is relative with half slice, when thread schedule in half slice is not balance, would out of bound. Because the max half slice is 4 in GT4, multiply stack size with 4 for safe. */ if(cl_driver_get_ver(ctx->drv) == 75) stack_sz *= 4; cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE); } static int cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) { int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER; int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); size_t buf_size = global_sz * sizeof(int) * printf_num; if (offset > 0) { if (cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size*2, offset, interp_get_printf_indexbuf_bti(printf_info)) != 0) return -1; } value = GBE_CURBE_PRINTF_BUF_POINTER; offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); buf_size = interp_get_printf_sizeof_size(printf_info) * global_sz; /* because of the printf may exist in a loop, which loop number can not be gotten by static analysis. So we set the data buffer as big as we can. Out of bound printf info will be discarded. */ if (buf_size < 1*1024) buf_size = 1*1024*1024; else buf_size = 16*1024*1024; //at most. if (offset > 0) { if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset, interp_get_printf_buf_bti(printf_info)) != 0) return -1; } return 0; } LOCAL cl_int cl_command_queue_ND_range_gen7(cl_command_queue queue, cl_kernel ker, const uint32_t work_dim, const size_t *global_wk_off, const size_t *global_wk_sz, const size_t *local_wk_sz) { GET_QUEUE_THREAD_GPGPU(queue); cl_context ctx = queue->ctx; char *final_curbe = NULL; /* Includes them and one sub-buffer per group */ cl_gpgpu_kernel kernel; const uint32_t simd_sz = cl_kernel_get_simd_width(ker); size_t i, batch_sz = 0u, local_sz = 0u; size_t cst_sz = ker->curbe_sz= interp_kernel_get_curbe_size(ker->opaque); int32_t scratch_sz = interp_kernel_get_scratch_size(ker->opaque); size_t thread_n = 0u; int printf_num = 0; cl_int err = CL_SUCCESS; size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2]; void* printf_info = NULL; /* Setup kernel */ kernel.name = "KERNEL"; kernel.grf_blocks = 128; kernel.bo = ker->bo; kernel.barrierID = 0; kernel.slm_sz = 0; kernel.use_slm = interp_kernel_use_slm(ker->opaque); /* Compute the number of HW threads we need */ if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, &local_sz) != CL_SUCCESS)) { fprintf(stderr, "Beignet: Work group size exceed Kerne's work group size.\n"); return err; } kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz; kernel.curbe_sz = cst_sz; if (scratch_sz > ker->program->ctx->device->scratch_mem_size) { fprintf(stderr, "Beignet: Out of scratch memory %d.\n", scratch_sz); return CL_OUT_OF_RESOURCES; } /* Curbe step 1: fill the constant urb buffer data shared by all threads */ if (ker->curbe) { kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n); if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) { fprintf(stderr, "Beignet: Out of shared local memory %d.\n", kernel.slm_sz); return CL_OUT_OF_RESOURCES; } } printf_info = interp_dup_printfset(ker->opaque); cl_gpgpu_set_printf_info(gpgpu, printf_info, (size_t *)global_wk_sz); /* Setup the kernel */ if (queue->props & CL_QUEUE_PROFILING_ENABLE) err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 1); else err = cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit * ctx->device->max_thread_per_unit, cst_sz / 32, 0); if (err != 0) goto error; printf_num = interp_get_printf_num(printf_info); if (printf_num) { if (cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0) goto error; } /* Bind user buffers */ cl_command_queue_bind_surface(queue, ker); /* Bind user images */ cl_command_queue_bind_image(queue, ker); /* Bind all samplers */ cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz); if (cl_gpgpu_set_scratch(gpgpu, scratch_sz) != 0) goto error; /* Bind a stack if needed */ cl_bind_stack(gpgpu, ker); if (cl_upload_constant_buffer(queue, ker) != 0) goto error; cl_gpgpu_states_setup(gpgpu, &kernel); /* Curbe step 2. Give the localID and upload it to video memory */ if (ker->curbe) { assert(cst_sz > 0); TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz)); for (i = 0; i < thread_n; ++i) { memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz); } TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n); if (cl_gpgpu_upload_curbes(gpgpu, final_curbe, thread_n*cst_sz) != 0) goto error; } /* Start a new batch buffer */ batch_sz = cl_kernel_compute_batch_sz(ker); if (cl_gpgpu_batch_reset(gpgpu, batch_sz) != 0) goto error; cl_set_thread_batch_buf(queue, cl_gpgpu_ref_batch_buf(gpgpu)); cl_gpgpu_batch_start(gpgpu); /* Issue the GPGPU_WALKER command */ cl_gpgpu_walker(gpgpu, simd_sz, thread_n, global_wk_off, global_wk_sz, local_wk_sz); /* Close the batch buffer and submit it */ cl_gpgpu_batch_end(gpgpu, 0); return CL_SUCCESS; error: /* only some command/buffer internal error reach here, so return error code OOR */ return CL_OUT_OF_RESOURCES; } Beignet-1.1.1-Source/src/x11/mesa_egl_extension.h000664 001750 001750 00000002021 12576733264 020710 0ustar00yryr000000 000000 #ifndef __MESA_EGL_EXTENSION_H__ #define __MESA_EGL_EXTENSION_H__ #include #include #include #define EGL_GL_TEXTURE_MESA 0x3300 /* eglAcuireResource target */ #define EGL_GL_BUFFER_OBJECT_MESA 0x3301 /* eglAcuireResource target */ #define EGL_GL_RENDER_BUFFER_MESA 0x3302 /* eglAcuireResource target */ #define EGL_GL_TEXTURE_ID_MESA 0x3303 /* eglAcuireResource attribute */ #define EGL_GL_TEXTURE_LEVEL_MESA 0x3304 /* eglAcuireResource attribute */ #define EGL_GL_TEXTURE_TARGET_MESA 0x3305 /* eglAcuireResource attribute */ #define EGL_GL_BUFFER_OBJECT_ID_MESA 0x3306 /* eglAcuireResource attribute */ #define EGL_GL_RENDER_BUFFER_ID_MESA 0x3307 /* eglAcuireResource attribute */ EGLBoolean eglAcquireResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list, void * user_data); EGLBoolean eglReleaseResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list); #endif Beignet-1.1.1-Source/src/x11/va_dri2str.h000664 001750 001750 00000013537 12576733264 017135 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #ifndef _DRI2_PROTO_H_ #define _DRI2_PROTO_H_ #define DRI2_NAME "DRI2" #define DRI2_MAJOR 1 #define DRI2_MINOR 0 #define DRI2NumberErrors 0 #define DRI2NumberEvents 0 #define DRI2NumberRequests 7 #define X_DRI2QueryVersion 0 #define X_DRI2Connect 1 #define X_DRI2Authenticate 2 #define X_DRI2CreateDrawable 3 #define X_DRI2DestroyDrawable 4 #define X_DRI2GetBuffers 5 #define X_DRI2CopyRegion 6 typedef struct { CARD32 attachment B32; CARD32 name B32; CARD32 pitch B32; CARD32 cpp B32; CARD32 flags B32; } xDRI2Buffer; typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 majorVersion B32; CARD32 minorVersion B32; } xDRI2QueryVersionReq; #define sz_xDRI2QueryVersionReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 majorVersion B32; CARD32 minorVersion B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; } xDRI2QueryVersionReply; #define sz_xDRI2QueryVersionReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 window B32; CARD32 drivertype B32; } xDRI2ConnectReq; #define sz_xDRI2ConnectReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 driverNameLength B32; CARD32 deviceNameLength B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; } xDRI2ConnectReply; #define sz_xDRI2ConnectReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 window B32; CARD32 magic B32; } xDRI2AuthenticateReq; #define sz_xDRI2AuthenticateReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 authenticated B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; CARD32 pad6 B32; } xDRI2AuthenticateReply; #define sz_xDRI2AuthenticateReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; } xDRI2CreateDrawableReq; #define sz_xDRI2CreateDrawableReq 8 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; } xDRI2DestroyDrawableReq; #define sz_xDRI2DestroyDrawableReq 8 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; CARD32 count B32; } xDRI2GetBuffersReq; #define sz_xDRI2GetBuffersReq 12 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 width B32; CARD32 height B32; CARD32 count B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; } xDRI2GetBuffersReply; #define sz_xDRI2GetBuffersReply 32 typedef struct { CARD8 reqType; CARD8 dri2Reqtype; CARD16 length B16; CARD32 drawable B32; CARD32 region B32; CARD32 dest B32; CARD32 src B32; } xDRI2CopyRegionReq; #define sz_xDRI2CopyRegionReq 20 typedef struct { BYTE type; /* X_Reply */ BYTE pad1; CARD16 sequenceNumber B16; CARD32 length B32; CARD32 pad2 B32; CARD32 pad3 B32; CARD32 pad4 B32; CARD32 pad5 B32; CARD32 pad6 B32; CARD32 pad7 B32; } xDRI2CopyRegionReply; #define sz_xDRI2CopyRegionReply 32 #endif Beignet-1.1.1-Source/src/x11/dricommon.c000664 001750 001750 00000021047 12576733264 017033 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Note: the code is taken from libva code base */ /* * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include "x11/va_dri2.h" #include "x11/va_dri2tokens.h" #include "x11/dricommon.h" #include "cl_utils.h" #include "cl_alloc.h" #include #include #include #include #define LOCAL __attribute__ ((visibility ("internal"))) LOCAL dri_drawable_t* dri_state_do_drawable_hash(dri_state_t *state, XID drawable) { int index = drawable % DRAWABLE_HASH_SZ; struct dri_drawable *dri_drawable = state->drawable_hash[index]; while (dri_drawable) { if (dri_drawable->x_drawable == drawable) return dri_drawable; dri_drawable = dri_drawable->next; } dri_drawable = dri_state_create_drawable(state, drawable); dri_drawable->x_drawable = drawable; dri_drawable->next = state->drawable_hash[index]; state->drawable_hash[index] = dri_drawable; return dri_drawable; } LOCAL void dri_state_free_drawable_hash(dri_state_t *state) { int i; struct dri_drawable *dri_drawable, *prev; for (i = 0; i < DRAWABLE_HASH_SZ; i++) { dri_drawable = state->drawable_hash[i]; while (dri_drawable) { prev = dri_drawable; dri_drawable = prev->next; dri_state_destroy_drawable(state, prev); } } } LOCAL dri_drawable_t* dri_state_get_drawable(dri_state_t *state, XID drawable) { return dri_state_do_drawable_hash(state, drawable); } LOCAL void dri_state_init_drawable_hash_table(dri_state_t *state) { int i; for(i=0; i < DRAWABLE_HASH_SZ; i++) state->drawable_hash[i] = NULL; } LOCAL void dri_state_delete(dri_state_t *state) { if (state == NULL) return; dri_state_close(state); cl_free(state); } LOCAL dri_state_t* dri_state_new(void) { dri_state_t *state = NULL; TRY_ALLOC_NO_ERR (state, CALLOC(dri_state_t)); state->fd = -1; state->driConnectedFlag = NONE; dri_state_init_drawable_hash_table(state); exit: return state; error: dri_state_delete(state); state = NULL; goto exit; } #define __DRI_BUFFER_FRONT_LEFT 0 #define __DRI_BUFFER_BACK_LEFT 1 #define __DRI_BUFFER_FRONT_RIGHT 2 #define __DRI_BUFFER_BACK_RIGHT 3 #define __DRI_BUFFER_DEPTH 4 #define __DRI_BUFFER_STENCIL 5 #define __DRI_BUFFER_ACCUM 6 #define __DRI_BUFFER_FAKE_FRONT_LEFT 7 #define __DRI_BUFFER_FAKE_FRONT_RIGHT 8 typedef struct dri2_drawable { struct dri_drawable base; union dri_buffer buffers[5]; int width; int height; int has_backbuffer; int back_index; int front_index; } dri2_drawable_t; LOCAL dri_drawable_t* dri_state_create_drawable(dri_state_t *state, XID x_drawable) { dri2_drawable_t *dri2_drwble; dri2_drwble = (dri2_drawable_t*)calloc(1, sizeof(*dri2_drwble)); if (!dri2_drwble) return NULL; dri2_drwble->base.x_drawable = x_drawable; dri2_drwble->base.x = 0; dri2_drwble->base.y = 0; VA_DRI2CreateDrawable(state->x11_dpy, x_drawable); return &dri2_drwble->base; } LOCAL void dri_state_destroy_drawable(dri_state_t *state, dri_drawable_t *dri_drwble) { VA_DRI2DestroyDrawable(state->x11_dpy, dri_drwble->x_drawable); free(dri_drwble); } LOCAL void dri_state_swap_buffer(dri_state_t *state, dri_drawable_t *dri_drwble) { dri2_drawable_t *dri2_drwble = (dri2_drawable_t*)dri_drwble; XRectangle xrect; XserverRegion region; if (dri2_drwble->has_backbuffer) { xrect.x = 0; xrect.y = 0; xrect.width = dri2_drwble->width; xrect.height = dri2_drwble->height; region = XFixesCreateRegion(state->x11_dpy, &xrect, 1); VA_DRI2CopyRegion(state->x11_dpy, dri_drwble->x_drawable, region, DRI2BufferFrontLeft, DRI2BufferBackLeft); XFixesDestroyRegion(state->x11_dpy, region); } } LOCAL union dri_buffer* dri_state_get_rendering_buffer(dri_state_t *state, dri_drawable_t *dri_drwble) { dri2_drawable_t *dri2_drwble = (dri2_drawable_t *)dri_drwble; int i; int count; unsigned int attachments[5]; VA_DRI2Buffer *buffers; i = 0; attachments[i++] = __DRI_BUFFER_BACK_LEFT; attachments[i++] = __DRI_BUFFER_FRONT_LEFT; buffers = VA_DRI2GetBuffers(state->x11_dpy, dri_drwble->x_drawable, &dri2_drwble->width, &dri2_drwble->height, attachments, i, &count); assert(buffers); if (buffers == NULL) return NULL; dri2_drwble->has_backbuffer = 0; for (i = 0; i < count; i++) { dri2_drwble->buffers[i].dri2.attachment = buffers[i].attachment; dri2_drwble->buffers[i].dri2.name = buffers[i].name; dri2_drwble->buffers[i].dri2.pitch = buffers[i].pitch; dri2_drwble->buffers[i].dri2.cpp = buffers[i].cpp; dri2_drwble->buffers[i].dri2.flags = buffers[i].flags; if (buffers[i].attachment == __DRI_BUFFER_BACK_LEFT) { dri2_drwble->has_backbuffer = 1; dri2_drwble->back_index = i; } if (buffers[i].attachment == __DRI_BUFFER_FRONT_LEFT) dri2_drwble->front_index = i; } dri_drwble->width = dri2_drwble->width; dri_drwble->height = dri2_drwble->height; Xfree(buffers); if (dri2_drwble->has_backbuffer) return &dri2_drwble->buffers[dri2_drwble->back_index]; return &dri2_drwble->buffers[dri2_drwble->front_index]; } LOCAL void dri_state_close(dri_state_t *state) { dri_state_free_drawable_hash(state); assert(state->fd >= 0); close(state->fd); } LOCAL void dri_state_release(dri_state_t *state) { dri_state_delete(state); } LOCAL dri_state_t* getDRI2State(Display* dpy, int screen, char **driver_name) { int major, minor; int error_base; int event_base; char *device_name = NULL; drm_magic_t magic; char * internal_driver_name = NULL; int fd = -1; dri_state_t* state = NULL; if (!VA_DRI2QueryExtension(dpy, &event_base, &error_base)) goto err_out; if (!VA_DRI2QueryVersion(dpy, &major, &minor)) goto err_out; if (!VA_DRI2Connect(dpy, RootWindow(dpy, screen), &internal_driver_name, &device_name)) goto err_out; fd = open(device_name, O_RDWR); if (fd < 0) goto err_out; if (drmGetMagic(fd, &magic)) goto err_out; if (!VA_DRI2Authenticate(dpy, RootWindow(dpy, screen), magic)) goto err_out; if(driver_name) *driver_name = internal_driver_name; else Xfree(internal_driver_name); state = dri_state_new(); state->fd = fd; state->x11_dpy = dpy; state->x11_screen = screen; state->driConnectedFlag = DRI2; if (device_name) Xfree(device_name); return state; err_out: if (device_name) Xfree(device_name); if (internal_driver_name) Xfree(internal_driver_name); if(driver_name) *driver_name = NULL; if (fd >= 0) close(fd); if (driver_name) *driver_name = NULL; return state; } Beignet-1.1.1-Source/src/x11/mesa_egl_res_share.c000664 001750 001750 00000007133 12576733264 020653 0ustar00yryr000000 000000 /************************************************************************** * * Copyright 2013-2014 Zhigang Gong * Copyright 2013-2014 Intel, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include #include #include "mesa_egl_extension.h" #include "mesa_egl_res_share.h" /** * Parse the list of share texture attributes and return the proper error code. */ EGLint _eglParseTextureAttribList(unsigned int *texture, EGLenum *gl_target, EGLint *level, const EGLint *attrib_list) { EGLint i, err = EGL_SUCCESS; *texture = 0; *gl_target = 0; *level = 0; if (!attrib_list) return EGL_BAD_ATTRIBUTE; for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; switch (attr) { case EGL_GL_TEXTURE_LEVEL_MESA: *level = val; break; case EGL_GL_TEXTURE_ID_MESA: *texture = val; break; case EGL_GL_TEXTURE_TARGET_MESA: *gl_target = val; break; default: /* unknown attrs are ignored */ break; } } return err; } /** * Parse the list of share texture attributes and return the proper error code. */ EGLint _eglParseBufferObjAttribList(unsigned int *bufobj, const EGLint *attrib_list) { EGLint i, err = EGL_SUCCESS; *bufobj = 0; if (!attrib_list) return EGL_BAD_ATTRIBUTE; for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; switch (attr) { case EGL_GL_BUFFER_OBJECT_ID_MESA: *bufobj = val; break; default: /* unknown attrs are ignored */ break; } } if (*bufobj == 0) err = EGL_BAD_ATTRIBUTE; return err; } /** * Parse the list of share texture attributes and return the proper error code. */ EGLint _eglParseRenderBufferAttribList(unsigned int *rb, const EGLint *attrib_list) { EGLint i, err = EGL_SUCCESS; *rb = 0; if (!attrib_list) return EGL_BAD_ATTRIBUTE; for (i = 0; attrib_list[i] != EGL_NONE; i++) { EGLint attr = attrib_list[i++]; EGLint val = attrib_list[i]; switch (attr) { case EGL_GL_RENDER_BUFFER_ID_MESA: *rb = val; break; default: /* unknown attrs are ignored */ break; } } if (*rb == 0) err = EGL_BAD_ATTRIBUTE; return err; } Beignet-1.1.1-Source/src/x11/mesa_egl_res_share.h000664 001750 001750 00000003455 12576733264 020663 0ustar00yryr000000 000000 /************************************************************************** * * Copyright 2013-2014 Zhigang Gong * Copyright 2013-2014 Intel, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * **************************************************************************/ #ifndef EGLRESSHARE_INCLUDED #define EGLRESSHARE_INCLUDED #include EGLint _eglParseTextureAttribList(unsigned int *texture, EGLenum *gl_target, EGLint *level, const EGLint *attrib_list); EGLint _eglParseBufferObjAttribList(unsigned int *bufobj, const EGLint *attrib_list); EGLint _eglParseRenderBufferAttribList(unsigned int *rb, const EGLint *attrib_list); #endif Beignet-1.1.1-Source/src/x11/dricommon.h000664 001750 001750 00000006013 12576733264 017034 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia * Note: the code is taken from libva code base */ /* * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _VA_DRICOMMON_H_ #define _VA_DRICOMMON_H_ #include #include #include #include union dri_buffer { struct { unsigned int attachment; unsigned int name; unsigned int pitch; unsigned int cpp; unsigned int flags; } dri2; }; typedef struct dri_drawable { XID x_drawable; int x; int y; unsigned int width; unsigned int height; struct dri_drawable *next; } dri_drawable_t; #define DRAWABLE_HASH_SZ 32 enum DRI_VER { NONE = 0, // NOT supported VA_DRI1 = 1, DRI2 = 2 }; typedef struct dri_state { Display *x11_dpy; int x11_screen; int fd; enum DRI_VER driConnectedFlag; /* 0: disconnected, 2: DRI2 */ dri_drawable_t *drawable_hash[DRAWABLE_HASH_SZ]; } dri_state_t; dri_drawable_t *dri_state_create_drawable(dri_state_t*, XID x_drawable); void dri_state_destroy_drawable(dri_state_t*, dri_drawable_t*); void dri_state_close(dri_state_t*); void dri_state_release(dri_state_t*); // Create a dri2 state from dpy and screen dri_state_t *getDRI2State(Display* dpy, int screen, char **driver_name); #endif /* _VA_DRICOMMON_H_ */ Beignet-1.1.1-Source/src/x11/va_dri2.c000664 001750 001750 00000022452 12576733264 016373 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #define NEED_REPLIES #include #include #include #include "xf86drm.h" #include "x11/va_dri2.h" #include "x11/va_dri2str.h" #include "x11/va_dri2tokens.h" #ifndef DRI2DriverDRI #define DRI2DriverDRI 0 #endif #define LOCAL __attribute__ ((visibility ("internal"))) static char va_dri2ExtensionName[] = DRI2_NAME; static XExtensionInfo _va_dri2_info_data; static XExtensionInfo *va_dri2Info = &_va_dri2_info_data; static XEXT_GENERATE_CLOSE_DISPLAY (VA_DRI2CloseDisplay, va_dri2Info) static /* const */ XExtensionHooks va_dri2ExtensionHooks = { NULL, /* create_gc */ NULL, /* copy_gc */ NULL, /* flush_gc */ NULL, /* free_gc */ NULL, /* create_font */ NULL, /* free_font */ VA_DRI2CloseDisplay, /* close_display */ NULL, /* wire_to_event */ NULL, /* event_to_wire */ NULL, /* error */ NULL, /* error_string */ }; static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay, va_dri2Info, va_dri2ExtensionName, &va_dri2ExtensionHooks, 0, NULL) LOCAL Bool VA_DRI2QueryExtension(Display *dpy, int *eventBase, int *errorBase) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); if (XextHasExtension(info)) { *eventBase = info->codes->first_event; *errorBase = info->codes->first_error; return True; } return False; } LOCAL Bool VA_DRI2QueryVersion(Display *dpy, int *major, int *minor) { XExtDisplayInfo *info = DRI2FindDisplay (dpy); xDRI2QueryVersionReply rep; xDRI2QueryVersionReq *req; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReq(DRI2QueryVersion, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2QueryVersion; req->majorVersion = DRI2_MAJOR; req->minorVersion = DRI2_MINOR; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return False; } *major = rep.majorVersion; *minor = rep.minorVersion; UnlockDisplay(dpy); SyncHandle(); return True; } LOCAL Bool VA_DRI2Connect(Display *dpy, XID window, char **driverName, char **deviceName) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2ConnectReply rep; xDRI2ConnectReq *req; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReq(DRI2Connect, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2Connect; req->window = window; req->drivertype = DRI2DriverDRI; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return False; } if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) { UnlockDisplay(dpy); SyncHandle(); return False; } *driverName = Xmalloc(rep.driverNameLength + 1); if (*driverName == NULL) { _XEatData(dpy, ((rep.driverNameLength + 3) & ~3) + ((rep.deviceNameLength + 3) & ~3)); UnlockDisplay(dpy); SyncHandle(); return False; } _XReadPad(dpy, *driverName, rep.driverNameLength); (*driverName)[rep.driverNameLength] = '\0'; *deviceName = Xmalloc(rep.deviceNameLength + 1); if (*deviceName == NULL) { Xfree(*driverName); _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3)); UnlockDisplay(dpy); SyncHandle(); return False; } _XReadPad(dpy, *deviceName, rep.deviceNameLength); (*deviceName)[rep.deviceNameLength] = '\0'; UnlockDisplay(dpy); SyncHandle(); return True; } LOCAL Bool VA_DRI2Authenticate(Display *dpy, XID window, drm_magic_t magic) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2AuthenticateReq *req; xDRI2AuthenticateReply rep; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReq(DRI2Authenticate, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2Authenticate; req->window = window; req->magic = magic; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return False; } UnlockDisplay(dpy); SyncHandle(); return rep.authenticated; } LOCAL void VA_DRI2CreateDrawable(Display *dpy, XID drawable) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2CreateDrawableReq *req; XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName); LockDisplay(dpy); GetReq(DRI2CreateDrawable, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2CreateDrawable; req->drawable = drawable; UnlockDisplay(dpy); SyncHandle(); } LOCAL void VA_DRI2DestroyDrawable(Display *dpy, XID drawable) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2DestroyDrawableReq *req; XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName); XSync(dpy, False); LockDisplay(dpy); GetReq(DRI2DestroyDrawable, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2DestroyDrawable; req->drawable = drawable; UnlockDisplay(dpy); SyncHandle(); } LOCAL VA_DRI2Buffer *VA_DRI2GetBuffers(Display *dpy, XID drawable, int *width, int *height, unsigned int *attachments, int count, int *outcount) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2GetBuffersReply rep; xDRI2GetBuffersReq *req; VA_DRI2Buffer *buffers; xDRI2Buffer repBuffer; CARD32 *p; int i; XextCheckExtension (dpy, info, va_dri2ExtensionName, False); LockDisplay(dpy); GetReqExtra(DRI2GetBuffers, count * 4, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2GetBuffers; req->drawable = drawable; req->count = count; p = (CARD32 *) &req[1]; for (i = 0; i < count; i++) p[i] = attachments[i]; if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { UnlockDisplay(dpy); SyncHandle(); return NULL; } *width = rep.width; *height = rep.height; *outcount = rep.count; buffers = Xmalloc(rep.count * sizeof buffers[0]); if (buffers == NULL) { _XEatData(dpy, rep.count * sizeof repBuffer); UnlockDisplay(dpy); SyncHandle(); return NULL; } for (i = 0; i < (int) rep.count; i++) { _XReadPad(dpy, (char *) &repBuffer, sizeof repBuffer); buffers[i].attachment = repBuffer.attachment; buffers[i].name = repBuffer.name; buffers[i].pitch = repBuffer.pitch; buffers[i].cpp = repBuffer.cpp; buffers[i].flags = repBuffer.flags; } UnlockDisplay(dpy); SyncHandle(); return buffers; } LOCAL void VA_DRI2CopyRegion(Display *dpy, XID drawable, XserverRegion region, CARD32 dest, CARD32 src) { XExtDisplayInfo *info = DRI2FindDisplay(dpy); xDRI2CopyRegionReq *req; xDRI2CopyRegionReply rep; XextSimpleCheckExtension (dpy, info, va_dri2ExtensionName); LockDisplay(dpy); GetReq(DRI2CopyRegion, req); req->reqType = info->codes->major_opcode; req->dri2Reqtype = X_DRI2CopyRegion; req->drawable = drawable; req->region = region; req->dest = dest; req->src = src; _XReply(dpy, (xReply *)&rep, 0, xFalse); UnlockDisplay(dpy); SyncHandle(); } Beignet-1.1.1-Source/src/x11/mesa_egl_extension.c000664 001750 001750 00000021070 12576733264 020710 0ustar00yryr000000 000000 #include #include "mesa_egl_extension.h" #include "mesa_egl_res_share.h" #include "src/cl_driver.h" struct _egl_display; struct _egl_resource; struct _egl_thread_info; struct _egl_config; struct _egl_surface; struct _egl_driver; typedef struct _egl_display _EGLDisplay; typedef struct _egl_resource _EGLResource; typedef struct _egl_thread_info _EGLThreadInfo; typedef struct _egl_config _EGLConfig; typedef struct _egl_surface _EGLSurface; typedef struct _egl_driver _EGLDriver; /** * A resource of a display. */ struct _egl_resource { /* which display the resource belongs to */ _EGLDisplay *Display; EGLBoolean IsLinked; EGLint RefCount; /* used to link resources of the same type */ _EGLResource *Next; }; /** * "Base" class for device driver contexts. */ struct _egl_context { /* A context is a display resource */ _EGLResource Resource; /* The bound status of the context */ _EGLThreadInfo *Binding; _EGLSurface *DrawSurface; _EGLSurface *ReadSurface; _EGLConfig *Config; EGLint ClientAPI; /**< EGL_OPENGL_ES_API, EGL_OPENGL_API, EGL_OPENVG_API */ EGLint ClientMajorVersion; EGLint ClientMinorVersion; EGLint Flags; EGLint Profile; EGLint ResetNotificationStrategy; /* The real render buffer when a window surface is bound */ EGLint WindowRenderBuffer; }; typedef struct _egl_context _EGLContext; struct dri2_egl_display { int dri2_major; int dri2_minor; __DRIscreen *dri_screen; int own_dri_screen; const __DRIconfig **driver_configs; void *driver; }; enum _egl_platform_type { _EGL_PLATFORM_WINDOWS, _EGL_PLATFORM_X11, _EGL_PLATFORM_WAYLAND, _EGL_PLATFORM_DRM, _EGL_PLATFORM_FBDEV, _EGL_PLATFORM_NULL, _EGL_PLATFORM_ANDROID, _EGL_NUM_PLATFORMS, _EGL_INVALID_PLATFORM = -1 }; typedef enum _egl_platform_type _EGLPlatformType; typedef pthread_mutex_t _EGLMutex; struct _egl_display { /* used to link displays */ _EGLDisplay *Next; _EGLMutex Mutex; _EGLPlatformType Platform; /**< The type of the platform display */ void *PlatformDisplay; /**< A pointer to the platform display */ _EGLDriver *Driver; /**< Matched driver of the display */ EGLBoolean Initialized; /**< True if the display is initialized */ /* options that affect how the driver initializes the display */ struct { EGLBoolean TestOnly; /**< Driver should not set fields when true */ EGLBoolean UseFallback; /**< Use fallback driver (sw or less features) */ } Options; /* these fields are set by the driver during init */ void *DriverData; /**< Driver private data */ }; static struct dri2_egl_display * dri2_egl_display(_EGLDisplay *dpy) { return (struct dri2_egl_display *)dpy->DriverData; } static _EGLDisplay * _eglLockDisplay(EGLDisplay dpy) { return (_EGLDisplay *)dpy; } static _EGLContext * _eglLookupContext(EGLContext ctx, EGLDisplay disp) { return (_EGLContext *) ctx; } struct dri2_egl_context { _EGLContext base; __DRIcontext *dri_context; }; static struct dri2_egl_context * dri2_egl_context(_EGLContext *ctx) { return (struct dri2_egl_context *)ctx; } static EGLBoolean dri2_acquire_texture(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list, void *user_data) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint texture = 0; GLenum gl_target = 0; GLint level = 0; GLboolean ret; if (_eglParseTextureAttribList(&texture, &gl_target, &level, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_acquire_texture(dri2_dpy->driver, dri2_ctx->dri_context, gl_target, level, texture, user_data); return ret; } static EGLBoolean dri2_release_texture(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint texture = 0; GLenum gl_target = 0; GLint level = 0; GLboolean ret; if (_eglParseTextureAttribList(&texture, &gl_target, &level, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_release_texture(dri2_dpy->driver, dri2_ctx->dri_context, gl_target, level, texture); return ret; } static EGLBoolean dri2_acquire_buffer_object(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list, void *user_data) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint bufobj = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&bufobj, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_acquire_buffer_object(dri2_dpy->driver, dri2_ctx->dri_context, bufobj, user_data); return ret; } static EGLBoolean dri2_release_buffer_object(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint bufobj = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&bufobj, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_release_buffer_object(dri2_dpy->driver, dri2_ctx->dri_context, bufobj); return ret; } static EGLBoolean dri2_acquire_render_buffer(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list, void *user_data) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint rb = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&rb, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_acquire_render_buffer(dri2_dpy->driver, dri2_ctx->dri_context, rb, user_data); return ret; } static EGLBoolean dri2_release_render_buffer(_EGLDisplay *disp, _EGLContext *ctx, const EGLint *attr_list) { struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); GLuint rb = 0; GLboolean ret; if (_eglParseBufferObjAttribList(&rb, attr_list) != EGL_SUCCESS) return EGL_FALSE; ret = cl_gl_release_render_buffer(dri2_dpy->driver, dri2_ctx->dri_context, rb); return ret; } static EGLBoolean dri2_acquire_resource_mesa(_EGLDisplay *disp, _EGLContext *ctx, const EGLenum target, const EGLint *attrib_list, void *user_data) { switch (target) { case EGL_GL_TEXTURE_MESA: return dri2_acquire_texture(disp, ctx, attrib_list, user_data); case EGL_GL_BUFFER_OBJECT_MESA: return dri2_acquire_buffer_object(disp, ctx, attrib_list, user_data); case EGL_GL_RENDER_BUFFER_MESA: return dri2_acquire_render_buffer(disp, ctx, attrib_list, user_data); default: fprintf(stderr, "bad resource target value 0x%04x", target); } return EGL_FALSE; } static EGLBoolean dri2_release_resource_mesa(_EGLDisplay *disp, _EGLContext *ctx, const EGLenum target, const EGLint *attrib_list) { switch (target) { case EGL_GL_TEXTURE_MESA: return dri2_release_texture(disp, ctx, attrib_list); case EGL_GL_BUFFER_OBJECT_MESA: return dri2_release_buffer_object(disp, ctx, attrib_list); case EGL_GL_RENDER_BUFFER_MESA: return dri2_release_render_buffer(disp, ctx, attrib_list); default: fprintf(stderr, "bad resource target value 0x%04x", target); } return EGL_FALSE; } EGLBoolean eglAcquireResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list, void *user) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *context = _eglLookupContext(ctx, disp); return dri2_acquire_resource_mesa(disp, context, target, attrib_list, user); } EGLBoolean eglReleaseResourceMESA(EGLDisplay dpy, EGLContext ctx, EGLenum target, const EGLint *attrib_list) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *context = _eglLookupContext(ctx, disp); return dri2_release_resource_mesa(disp, context, target, attrib_list); } Beignet-1.1.1-Source/src/x11/va_dri2tokens.h000664 001750 001750 00000005275 12576733264 017630 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #ifndef _DRI2_TOKENS_H_ #define _DRI2_TOKENS_H_ #define DRI2BufferFrontLeft 0 #define DRI2BufferBackLeft 1 #define DRI2BufferFrontRight 2 #define DRI2BufferBackRight 3 #define DRI2BufferDepth 4 #define DRI2BufferStencil 5 #define DRI2BufferAccum 6 #define DRI2BufferFakeFrontLeft 7 #define DRI2BufferFakeFrontRight 8 #define DRI2DriverDRI 0 #endif Beignet-1.1.1-Source/src/x11/va_dri2.h000664 001750 001750 00000006600 12576733264 016375 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /* * Copyright 2007,2008 Red Hat, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Soft- * ware"), to deal in the Software without restriction, including without * limitation the rights to use, copy, modify, merge, publish, distribute, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, provided that the above copyright * notice(s) and this permission notice appear in all copies of the Soft- * ware and that both the above copyright notice(s) and this permission * notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE- * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR- * MANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or * other dealings in this Software without prior written authorization of * the copyright holder. * * Authors: * Kristian Hgsberg (krh@redhat.com) */ #ifndef _VA_DRI2_H_ #define _VA_DRI2_H_ #include #include #include typedef struct { unsigned int attachment; unsigned int name; unsigned int pitch; unsigned int cpp; unsigned int flags; } VA_DRI2Buffer; extern Bool VA_DRI2QueryExtension(Display *display, int *eventBase, int *errorBase); extern Bool VA_DRI2QueryVersion(Display *display, int *major, int *minor); extern Bool VA_DRI2Connect(Display *display, XID window, char **driverName, char **deviceName); extern Bool VA_DRI2Authenticate(Display *display, XID window, drm_magic_t magic); extern void VA_DRI2CreateDrawable(Display *display, XID drawable); extern void VA_DRI2DestroyDrawable(Display *display, XID handle); extern VA_DRI2Buffer * VA_DRI2GetBuffers(Display *dpy, XID drawable, int *width, int *height, unsigned int *attachments, int count, int *outcount); #if 1 extern void VA_DRI2CopyRegion(Display *dpy, XID drawable, XserverRegion region, CARD32 dest, CARD32 src); #endif #endif Beignet-1.1.1-Source/src/git_sha1.sh000775 001750 001750 00000001073 12600661541 016304 0ustar00yryr000000 000000 #!/bin/bash SOURCE_DIR=$1 FILE=$2 touch ${SOURCE_DIR}/${FILE}_tmp if test -d ${SOURCE_DIR}/../.git; then if which git > /dev/null; then git --git-dir=${SOURCE_DIR}/../.git log -n 1 --oneline | \ sed 's/^\([^ ]*\) .*/#define BEIGNET_GIT_SHA1 "git-\1"/' \ > ${SOURCE_DIR}/${FILE}_tmp fi fi #updating ${SOURCE_DIR}/${FILE} if ! cmp -s ${SOURCE_DIR}/${FILE}_tmp ${SOURCE_DIR}/${FILE}; then mv ${SOURCE_DIR}/${FILE}_tmp ${SOURCE_DIR}/${FILE} else rm ${SOURCE_DIR}/${FILE}_tmp fi Beignet-1.1.1-Source/src/.gitignore000664 001750 001750 00000000025 12576733264 016250 0ustar00yryr000000 000000 OCLConfig.h libcl.so Beignet-1.1.1-Source/src/CMakeLists.txt000664 001750 001750 00000013336 12576733264 017031 0ustar00yryr000000 000000 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${DRM_INCLUDE_DIRS} ${DRM_INCLUDE_DIRS}/../ ${CMAKE_CURRENT_SOURCE_DIR}/../backend/src/backend/ ${CMAKE_CURRENT_SOURCE_DIR}/../include ${MESA_SOURCE_INCLUDES} ${LLVM_INCLUDE_DIR}) macro (MakeKernelBinStr KERNEL_PATH KERNEL_FILES) foreach (KF ${KERNEL_FILES}) set (input_file ${KERNEL_PATH}/${KF}.cl) set (output_file ${KERNEL_PATH}/${KF}_str.c) list (APPEND KERNEL_STR_FILES ${output_file}) list (GET GBE_BIN_GENERATER -1 GBE_BIN_FILE) if(GEN_PCI_ID) add_custom_command( OUTPUT ${output_file} COMMAND rm -rf ${output_file} COMMAND ${GBE_BIN_GENERATER} -s -o${output_file} -t${GEN_PCI_ID} ${input_file} DEPENDS ${input_file} ${GBE_BIN_FILE}) else(GEN_PCI_ID) add_custom_command( OUTPUT ${output_file} COMMAND rm -rf ${output_file} COMMAND ${GBE_BIN_GENERATER} -s -o${output_file} ${input_file} DEPENDS ${input_file} ${GBE_BIN_FILE}) endif(GEN_PCI_ID) endforeach (KF) endmacro (MakeKernelBinStr) macro (MakeBuiltInKernelStr KERNEL_PATH KERNEL_FILES) set (output_file ${KERNEL_PATH}/${BUILT_IN_NAME}.cl) set (file_content) file (REMOVE ${output_file}) foreach (KF ${KERNEL_NAMES}) set (input_file ${KERNEL_PATH}/${KF}.cl) file(READ ${input_file} file_content ) STRING(REGEX REPLACE ";" "\\\\;" file_content "${file_content}") file(APPEND ${output_file} ${file_content}) endforeach (KF) endmacro (MakeBuiltInKernelStr) set (KERNEL_STR_FILES) set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4 cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d cl_internal_copy_image_2d_to_2d_array cl_internal_copy_image_1d_array_to_1d_array cl_internal_copy_image_2d_array_to_2d_array cl_internal_copy_image_2d_array_to_2d cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_2d_to_buffer_align16 cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_2d_align16 cl_internal_copy_buffer_to_image_3d cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign cl_internal_fill_buf_align128 cl_internal_fill_image_1d cl_internal_fill_image_1d_array cl_internal_fill_image_2d cl_internal_fill_image_2d_array cl_internal_fill_image_3d) set (BUILT_IN_NAME cl_internal_built_in_kernel) MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${BUILT_IN_NAME}") set(OPENCL_SRC ${KERNEL_STR_FILES} cl_api.c cl_alloc.c cl_kernel.c cl_program.c cl_gbe_loader.cpp cl_sampler.c cl_event.c cl_enqueue.c cl_image.c cl_mem.c cl_platform_id.c cl_extensions.c cl_device_id.c cl_context.c cl_command_queue.c cl_command_queue.h cl_command_queue_gen7.c cl_thread.c cl_driver.h cl_driver.cpp cl_driver_defs.c intel/intel_gpgpu.c intel/intel_batchbuffer.c intel/intel_driver.c performance.c) if (X11_FOUND) set(CMAKE_CXX_FLAGS "-DHAS_X11 ${CMAKE_CXX_FLAGS}") set(CMAKE_C_FLAGS "-DHAS_X11 ${CMAKE_C_FLAGS}") set(OPENCL_SRC ${OPENCL_SRC} x11/dricommon.c x11/va_dri2.c) endif (X11_FOUND) if (EGL_FOUND AND MESA_SOURCE_FOUND) set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c x11/mesa_egl_extension.c x11/mesa_egl_res_share.c intel/intel_dri_resource_sharing.c) SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_EGL ${CMAKE_C_FLAGS}") SET(OPTIONAL_EGL_LIBRARY "${EGL_LIBRARY}") else(EGL_FOUND AND MESA_SOURCE_FOUND) SET(OPTIONAL_EGL_LIBRARY "") endif (EGL_FOUND AND MESA_SOURCE_FOUND) if (OCLIcd_FOUND) set (OPENCL_SRC ${OPENCL_SRC} cl_khr_icd.c) SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}") endif (OCLIcd_FOUND) if (DRM_INTEL_USERPTR) SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}") endif (DRM_INTEL_USERPTR) if (DRM_INTEL_EU_TOTAL) SET(CMAKE_CXX_FLAGS "-DHAS_EU_TOTAL ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_EU_TOTAL ${CMAKE_C_FLAGS}") endif (DRM_INTEL_EU_TOTAL) if (DRM_INTEL_SUBSLICE_TOTAL) SET(CMAKE_CXX_FLAGS "-DHAS_SUBSLICE_TOTAL ${CMAKE_CXX_FLAGS}") SET(CMAKE_C_FLAGS "-DHAS_SUBSLICE_TOTAL ${CMAKE_C_FLAGS}") endif (DRM_INTEL_SUBSLICE_TOTAL) set(GIT_SHA1 "git_sha1.h") add_custom_target(${GIT_SHA1} ALL COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh ${CMAKE_CURRENT_SOURCE_DIR} ${GIT_SHA1} ) SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-Bsymbolic,--allow-shlib-undefined") link_directories (${LLVM_LIBRARY_DIR} ${DRM_LIBDIR}) add_library(cl SHARED ${OPENCL_SRC}) ADD_DEPENDENCIES(cl ${GIT_SHA1}) target_link_libraries( cl ${X11_LIBRARIES} ${XEXT_LIBRARIES} ${XFIXES_LIBRARIES} ${DRM_INTEL_LIBRARIES} ${DRM_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS} ${OPENGL_LIBRARIES} ${OPTIONAL_EGL_LIBRARY}) install (TARGETS cl LIBRARY DESTINATION ${BEIGNET_INSTALL_DIR}) Beignet-1.1.1-Source/src/cl_khr_icd.h000664 001750 001750 00000002151 12576733264 016514 0ustar00yryr000000 000000 /* * Copyright © 2013 Simon Richter * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #ifndef __CL_KHR_ICD_H__ #define __CL_KHR_ICD_H__ #ifdef HAS_OCLIcd #define SET_ICD(dispatch) \ dispatch = &cl_khr_icd_dispatch; #define INIT_ICD(member) .member = &cl_khr_icd_dispatch, #define DEFINE_ICD(member) struct _cl_icd_dispatch const *member; extern struct _cl_icd_dispatch const cl_khr_icd_dispatch; #else #define SET_ICD(dispatch) #define INIT_ICD(member) #define DEFINE_ICD(member) #endif #endif Beignet-1.1.1-Source/src/cl_mem_gl.c000664 001750 001750 00000005220 12576733264 016344 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Zhigang Gong */ #include #include #include #include #include #include #include "cl_mem.h" #include "cl_image.h" #include "cl_context.h" #include "cl_utils.h" #include "cl_alloc.h" #include "cl_device_id.h" #include "cl_driver.h" #include "cl_platform_id.h" #include "cl_mem_gl.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include "CL/cl_gl.h" LOCAL cl_mem cl_mem_new_gl_buffer(cl_context ctx, cl_mem_flags flags, GLuint buf_obj, cl_int *errcode_ret) { NOT_IMPLEMENTED; } LOCAL cl_mem cl_mem_new_gl_texture(cl_context ctx, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, cl_int *errcode_ret) { cl_int err = CL_SUCCESS; cl_mem mem = NULL; /* Check flags consistency */ if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR)) { err = CL_INVALID_ARG_VALUE; goto error; } mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, &err); if (mem == NULL || err != CL_SUCCESS) goto error; mem->bo = cl_buffer_alloc_from_texture(ctx, texture_target, miplevel, texture, cl_mem_image(mem)); if (UNLIKELY(mem->bo == NULL)) { err = CL_MEM_OBJECT_ALLOCATION_FAILURE; goto error; } cl_mem_gl_image(mem)->target = texture_target; cl_mem_gl_image(mem)->miplevel = miplevel; cl_mem_gl_image(mem)->texture = texture; exit: if (errcode_ret) *errcode_ret = err; return mem; error: cl_mem_delete(mem); mem = NULL; goto exit; } LOCAL void cl_mem_gl_delete(struct _cl_mem_gl_image *gl_image) { if (gl_image->base.base.bo != NULL) cl_buffer_release_from_texture(gl_image->base.base.ctx, gl_image->target, gl_image->miplevel, gl_image->texture); } Beignet-1.1.1-Source/src/cl_khr_icd.c000664 001750 001750 00000011504 12576733264 016511 0ustar00yryr000000 000000 /* * Copyright © 2013 Simon Richter * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include #include "cl_platform_id.h" /* The interop functions are not implemented in Beignet */ #define CL_GL_INTEROP(x) NULL /* OpenCL 1.2 is not implemented in Beignet */ #define CL_1_2_NOTYET(x) NULL /** Return platform list through ICD interface * This code is used only if a client is linked directly against the library * instead of using the ICD loader. In this case, no other implementations * should exist in the process address space, so the call is equivalent to * clGetPlatformIDs(). * * @param[in] num_entries Number of entries allocated in return buffer * @param[out] platforms Platform identifiers supported by this implementation * @param[out] num_platforms Number of platform identifiers returned * @return OpenCL error code * @retval CL_SUCCESS Successful execution * @retval CL_PLATFORM_NOT_FOUND_KHR No platforms provided * @retval CL_INVALID_VALUE Invalid parameters */ cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms) { return clGetPlatformIDs(num_entries, platforms, num_platforms); } struct _cl_icd_dispatch const cl_khr_icd_dispatch = { clGetPlatformIDs, clGetPlatformInfo, clGetDeviceIDs, clGetDeviceInfo, clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, (void *) NULL, /* clSetCommandQueueProperty */ clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, CL_GL_INTEROP(clCreateFromGLBuffer), CL_GL_INTEROP(clCreateFromGLTexture2D), CL_GL_INTEROP(clCreateFromGLTexture3D), CL_GL_INTEROP(clCreateFromGLRenderbuffer), CL_GL_INTEROP(clGetGLObjectInfo), CL_GL_INTEROP(clGetGLTextureInfo), CL_GL_INTEROP(clEnqueueAcquireGLObjects), CL_GL_INTEROP(clEnqueueReleaseGLObjects), CL_GL_INTEROP(clGetGLContextInfoKHR), (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, clSetEventCallback, clCreateSubBuffer, clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, CL_1_2_NOTYET(clCreateSubDevicesEXT), CL_1_2_NOTYET(clRetainDeviceEXT), CL_1_2_NOTYET(clReleaseDeviceEXT), #ifdef CL_VERSION_1_2 (void *) NULL, clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage, clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler, clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects, clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, clGetExtensionFunctionAddressForPlatform, CL_GL_INTEROP(clCreateFromGLTexture), (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL, (void *) NULL #endif }; Beignet-1.1.1-Source/src/cl_utils.h000664 001750 001750 00000032700 12576733264 016254 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_UTILS_H__ #define __CL_UTILS_H__ /* INLINE is forceinline */ #define INLINE __attribute__((always_inline)) inline /* Branch hint */ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) /* Stringify macros */ #define JOIN(X, Y) _DO_JOIN(X, Y) #define _DO_JOIN(X, Y) _DO_JOIN2(X, Y) #define _DO_JOIN2(X, Y) X##Y /* Check compile time errors */ #define STATIC_ASSERT(value) \ struct JOIN(__,JOIN(__,__LINE__)) { \ int x[(value) ? 1 : -1]; \ } /* Throw errors */ #ifdef NDEBUG #define ERR(ERROR, ...) \ do { \ err = ERROR; \ goto error; \ } while (0) #else #define ERR(ERROR, ...) \ do { \ fprintf(stderr, "error in %s line %i\n", __FILE__, __LINE__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ err = ERROR; \ goto error; \ } while (0) #endif #define DO_ALLOC_ERR \ do { \ ERR(CL_OUT_OF_HOST_MEMORY, "Out of memory"); \ } while (0) #define ERR_IF(COND, ERROR, ...) \ do { \ if (UNLIKELY(COND)) ERR (ERROR, __VA_ARGS__); \ } while (0) #define INVALID_VALUE_IF(COND) \ do { \ ERR_IF(COND, CL_INVALID_VALUE, "Invalid value"); \ } while (0) #define INVALID_DEVICE_IF(COND) \ do { \ ERR_IF(COND, CL_INVALID_DEVICE, "Invalid device"); \ } while (0) #define MAX(x0, x1) ((x0) > (x1) ? (x0) : (x1)) #define MIN(x0, x1) ((x0) < (x1) ? (x0) : (x1)) #define ALIGN(A, B) (((A) % (B)) ? (A) + (B) - ((A) % (B)) : (A)) #define DO_ALLOC_ERROR \ do { \ err = CL_OUT_OF_HOST_MEMORY; \ goto error; \ } while (0) #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (UNLIKELY(COND)) FATAL(__VA_ARGS__); \ } while (0) #define NOT_IMPLEMENTED FATAL ("Not implemented") #define CHECK_CONTEXT(CTX) \ do { \ if (UNLIKELY(CTX == NULL)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ if (UNLIKELY(CTX->magic != CL_MAGIC_CONTEXT_HEADER)) { \ err = CL_INVALID_CONTEXT; \ goto error; \ } \ } while (0) #define CHECK_QUEUE(QUEUE) \ do { \ if (UNLIKELY(QUEUE == NULL)) { \ err = CL_INVALID_COMMAND_QUEUE; \ goto error; \ } \ if (UNLIKELY(QUEUE->magic != CL_MAGIC_QUEUE_HEADER)) { \ err = CL_INVALID_COMMAND_QUEUE; \ goto error; \ } \ } while (0) #define CHECK_MEM(MEM) \ do { \ if (UNLIKELY(MEM == NULL)) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ if (UNLIKELY(MEM->magic != CL_MAGIC_MEM_HEADER)) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ } while (0) #define CHECK_IMAGE(MEM, IMAGE) \ CHECK_MEM(MEM); \ do { \ if (UNLIKELY(!IS_IMAGE(MEM))) { \ err = CL_INVALID_MEM_OBJECT; \ goto error; \ } \ } while (0); \ struct _cl_mem_image *IMAGE; \ IMAGE = cl_mem_image(MEM); \ #define FIXUP_IMAGE_REGION(IMAGE, PREGION, REGION) \ const size_t *REGION; \ size_t REGION ##_REC[3]; \ do { \ if (IMAGE->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { \ REGION ##_REC[0] = PREGION[0]; \ REGION ##_REC[1] = 1; \ REGION ##_REC[2] = PREGION[1]; \ REGION = REGION ##_REC; \ } else { \ REGION = PREGION; \ } \ } while(0) #define FIXUP_IMAGE_ORIGIN(IMAGE, PREGION, REGION) \ const size_t *REGION; \ size_t REGION ##_REC[3]; \ do { \ if (IMAGE->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { \ REGION ##_REC[0] = PREGION[0]; \ REGION ##_REC[1] = 0; \ REGION ##_REC[2] = PREGION[1]; \ REGION = REGION ##_REC; \ } else { \ REGION = PREGION; \ } \ } while(0) #define CHECK_EVENT(EVENT) \ do { \ if (UNLIKELY(EVENT == NULL)) { \ err = CL_INVALID_EVENT; \ goto error; \ } \ if (UNLIKELY(EVENT->magic != CL_MAGIC_EVENT_HEADER)) { \ err = CL_INVALID_EVENT; \ goto error; \ } \ } while (0) #define CHECK_SAMPLER(SAMPLER) \ do { \ if (UNLIKELY(SAMPLER == NULL)) { \ err = CL_INVALID_SAMPLER; \ goto error; \ } \ if (UNLIKELY(SAMPLER->magic != CL_MAGIC_SAMPLER_HEADER)) {\ err = CL_INVALID_SAMPLER; \ goto error; \ } \ } while (0) #define CHECK_KERNEL(KERNEL) \ do { \ if (UNLIKELY(KERNEL == NULL)) { \ err = CL_INVALID_KERNEL; \ goto error; \ } \ if (UNLIKELY(KERNEL->magic != CL_MAGIC_KERNEL_HEADER)) { \ err = CL_INVALID_KERNEL; \ goto error; \ } \ } while (0) #define CHECK_PROGRAM(PROGRAM) \ do { \ if (UNLIKELY(PROGRAM == NULL)) { \ err = CL_INVALID_PROGRAM; \ goto error; \ } \ if (UNLIKELY(PROGRAM->magic != CL_MAGIC_PROGRAM_HEADER)) {\ err = CL_INVALID_PROGRAM; \ goto error; \ } \ } while (0) #define ELEMENTS(x) (sizeof(x)/sizeof(*(x))) #define CALLOC_STRUCT(T) (struct T*) cl_calloc(1, sizeof(struct T)) #define CALLOC(T) (T*) cl_calloc(1, sizeof(T)) #define CALLOC_ARRAY(T, N) (T*) cl_calloc(N, sizeof(T)) #define MEMZERO(x) do { memset((x),0,sizeof(*(x))); } while (0) /* Run some code and catch errors */ #define TRY(fn,...) \ do { \ if (UNLIKELY((err = fn(__VA_ARGS__)) != CL_SUCCESS)) \ goto error; \ } while (0) #define TRY_NO_ERR(fn,...) \ do { \ if (UNLIKELY(fn(__VA_ARGS__) != CL_SUCCESS)) \ goto error; \ } while (0) #define TRY_ALLOC(dst, EXPR) \ do { \ if (UNLIKELY((dst = EXPR) == NULL)) \ DO_ALLOC_ERROR; \ } while (0) #define TRY_ALLOC_NO_ERR(dst, EXPR) \ do { \ if (UNLIKELY((dst = EXPR) == NULL)) \ goto error; \ } while (0) #define TRY_ALLOC_NO_RET(EXPR) \ do { \ if (UNLIKELY((EXPR) == NULL)) \ DO_ALLOC_ERROR; \ } while (0) /* Break Point Definitions */ #if !defined(NDEBUG) #define BREAK \ do { \ __asm__("int3"); \ } while(0) #define BREAK_IF(value) \ do { \ if (UNLIKELY(!(value))) BREAKPOINT(); \ } while(0) #else #define BREAKPOINT() do { } while(0) #define ASSERT(value) do { } while(0) #endif /* For all internal functions */ #define LOCAL __attribute__ ((visibility ("internal"))) /* Align a structure or a variable */ #define ALIGNED(X) __attribute__ ((aligned (X))) /* Number of DWORDS */ #define SIZEOF32(X) (sizeof(X) / sizeof(uint32_t)) /* Memory quantity */ #define KB 1024 #define MB (KB*KB) /* To help bitfield definitions */ #define BITFIELD_BIT(X) 1 #define BITFIELD_RANGE(X,Y) ((Y) - (X) + 1) /* 32 bits atomic variable */ typedef volatile int atomic_t; static INLINE int atomic_add(atomic_t *v, const int c) { register int i = c; __asm__ __volatile__("lock ; xaddl %0, %1;" : "+r"(i), "+m"(*v) : "m"(*v), "r"(i)); return i; } static INLINE int atomic_inc(atomic_t *v) { return atomic_add(v, 1); } static INLINE int atomic_dec(atomic_t *v) { return atomic_add(v, -1); } #endif /* __CL_UTILS_H__ */ Beignet-1.1.1-Source/src/cl_device_data.h000664 001750 001750 00000034663 12576736025 017354 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_DEVICE_DATA_H__ #define __CL_DEVICE_DATA_H__ #define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device found #define INTEL_VENDOR_ID 0x8086 // Vendor ID for Intel #define PCI_CHIP_GM45_GM 0x2A42 #define PCI_CHIP_IGD_E_G 0x2E02 #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 #define PCI_CHIP_IGDNG_D_G 0x0042 #define PCI_CHIP_IGDNG_M_G 0x0046 #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ devid == PCI_CHIP_G41_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) #define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G) #define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G) #define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid)) #ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE #define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */ #define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 #define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 #define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 #define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */ #define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 #define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 #define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 #define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */ #define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A #endif #define IS_GEN6(devid) \ (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ devid == PCI_CHIP_SANDYBRIDGE_S_GT) #define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ #define PCI_CHIP_IVYBRIDGE_GT2 0x0162 #define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ #define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 #define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ #define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a #define PCI_CHIP_BAYTRAIL_T 0x0F31 #define IS_IVB_GT1(devid) \ (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ devid == PCI_CHIP_IVYBRIDGE_S_GT1) #define IS_IVB_GT2(devid) \ (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \ devid == PCI_CHIP_IVYBRIDGE_S_GT2) #define IS_BAYTRAIL_T(devid) \ (devid == PCI_CHIP_BAYTRAIL_T) #define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid) || IS_BAYTRAIL_T(devid)) #define IS_GEN7(devid) IS_IVYBRIDGE(devid) #define PCI_CHIP_HASWELL_D1 0x0402 /* GT1 desktop */ #define PCI_CHIP_HASWELL_D2 0x0412 /* GT2 desktop */ #define PCI_CHIP_HASWELL_D3 0x0422 /* GT3 desktop */ #define PCI_CHIP_HASWELL_S1 0x040a /* GT1 server */ #define PCI_CHIP_HASWELL_S2 0x041a /* GT2 server */ #define PCI_CHIP_HASWELL_S3 0x042a /* GT3 server */ #define PCI_CHIP_HASWELL_M1 0x0406 /* GT1 mobile */ #define PCI_CHIP_HASWELL_M2 0x0416 /* GT2 mobile */ #define PCI_CHIP_HASWELL_M3 0x0426 /* GT3 mobile */ #define PCI_CHIP_HASWELL_B1 0x040B /* Haswell GT1 */ #define PCI_CHIP_HASWELL_B2 0x041B /* Haswell GT2 */ #define PCI_CHIP_HASWELL_B3 0x042B /* Haswell GT3 */ #define PCI_CHIP_HASWELL_E1 0x040E /* Haswell GT1 */ #define PCI_CHIP_HASWELL_E2 0x041E /* Haswell GT2 */ #define PCI_CHIP_HASWELL_E3 0x042E /* Haswell GT3 */ /* Software Development Vehicle devices. */ #define PCI_CHIP_HASWELL_SDV_D1 0x0C02 /* SDV GT1 desktop */ #define PCI_CHIP_HASWELL_SDV_D2 0x0C12 /* SDV GT2 desktop */ #define PCI_CHIP_HASWELL_SDV_D3 0x0C22 /* SDV GT3 desktop */ #define PCI_CHIP_HASWELL_SDV_S1 0x0C0A /* SDV GT1 server */ #define PCI_CHIP_HASWELL_SDV_S2 0x0C1A /* SDV GT2 server */ #define PCI_CHIP_HASWELL_SDV_S3 0x0C2A /* SDV GT3 server */ #define PCI_CHIP_HASWELL_SDV_M1 0x0C06 /* SDV GT1 mobile */ #define PCI_CHIP_HASWELL_SDV_M2 0x0C16 /* SDV GT2 mobile */ #define PCI_CHIP_HASWELL_SDV_M3 0x0C26 /* SDV GT3 mobile */ #define PCI_CHIP_HASWELL_SDV_B1 0x0C0B /* SDV GT1 */ #define PCI_CHIP_HASWELL_SDV_B2 0x0C1B /* SDV GT2 */ #define PCI_CHIP_HASWELL_SDV_B3 0x0C2B /* SDV GT3 */ #define PCI_CHIP_HASWELL_SDV_E1 0x0C0E /* SDV GT1 */ #define PCI_CHIP_HASWELL_SDV_E2 0x0C1E /* SDV GT2 */ #define PCI_CHIP_HASWELL_SDV_E3 0x0C2E /* SDV GT3 */ /* Ultrabooks */ #define PCI_CHIP_HASWELL_ULT_D1 0x0A02 /* ULT GT1 desktop */ #define PCI_CHIP_HASWELL_ULT_D2 0x0A12 /* ULT GT2 desktop */ #define PCI_CHIP_HASWELL_ULT_D3 0x0A22 /* ULT GT3 desktop */ #define PCI_CHIP_HASWELL_ULT_S1 0x0A0A /* ULT GT1 server */ #define PCI_CHIP_HASWELL_ULT_S2 0x0A1A /* ULT GT2 server */ #define PCI_CHIP_HASWELL_ULT_S3 0x0A2A /* ULT GT3 server */ #define PCI_CHIP_HASWELL_ULT_M1 0x0A06 /* ULT GT1 mobile */ #define PCI_CHIP_HASWELL_ULT_M2 0x0A16 /* ULT GT2 mobile */ #define PCI_CHIP_HASWELL_ULT_M3 0x0A26 /* ULT GT3 mobile */ #define PCI_CHIP_HASWELL_ULT_B1 0x0A0B /* ULT GT1 */ #define PCI_CHIP_HASWELL_ULT_B2 0x0A1B /* ULT GT2 */ #define PCI_CHIP_HASWELL_ULT_B3 0x0A2B /* ULT GT3 */ #define PCI_CHIP_HASWELL_ULT_E1 0x0A0E /* ULT GT1 */ #define PCI_CHIP_HASWELL_ULT_E2 0x0A1E /* ULT GT2 */ #define PCI_CHIP_HASWELL_ULT_E3 0x0A2E /* ULT GT3 */ /* CRW */ #define PCI_CHIP_HASWELL_CRW_D1 0x0D02 /* CRW GT1 desktop */ #define PCI_CHIP_HASWELL_CRW_D2 0x0D12 /* CRW GT2 desktop */ #define PCI_CHIP_HASWELL_CRW_D3 0x0D22 /* CRW GT3 desktop */ #define PCI_CHIP_HASWELL_CRW_S1 0x0D0A /* CRW GT1 server */ #define PCI_CHIP_HASWELL_CRW_S2 0x0D1A /* CRW GT2 server */ #define PCI_CHIP_HASWELL_CRW_S3 0x0D2A /* CRW GT3 server */ #define PCI_CHIP_HASWELL_CRW_M1 0x0D06 /* CRW GT1 mobile */ #define PCI_CHIP_HASWELL_CRW_M2 0x0D16 /* CRW GT2 mobile */ #define PCI_CHIP_HASWELL_CRW_M3 0x0D26 /* CRW GT3 mobile */ #define PCI_CHIP_HASWELL_CRW_B1 0x0D0B /* CRW GT1 */ #define PCI_CHIP_HASWELL_CRW_B2 0x0D1B /* CRW GT2 */ #define PCI_CHIP_HASWELL_CRW_B3 0x0D2B /* CRW GT3 */ #define PCI_CHIP_HASWELL_CRW_E1 0x0D0E /* CRW GT1 */ #define PCI_CHIP_HASWELL_CRW_E2 0x0D1E /* CRW GT2 */ #define PCI_CHIP_HASWELL_CRW_E3 0x0D2E /* CRW GT3 */ #define IS_HASWELL(devid) ( \ (devid) == PCI_CHIP_HASWELL_D1 || (devid) == PCI_CHIP_HASWELL_D2 || \ (devid) == PCI_CHIP_HASWELL_D3 || (devid) == PCI_CHIP_HASWELL_S1 || \ (devid) == PCI_CHIP_HASWELL_S2 || (devid) == PCI_CHIP_HASWELL_S3 || \ (devid) == PCI_CHIP_HASWELL_M1 || (devid) == PCI_CHIP_HASWELL_M2 || \ (devid) == PCI_CHIP_HASWELL_M3 || (devid) == PCI_CHIP_HASWELL_B1 || \ (devid) == PCI_CHIP_HASWELL_B2 || (devid) == PCI_CHIP_HASWELL_B3 || \ (devid) == PCI_CHIP_HASWELL_E1 || (devid) == PCI_CHIP_HASWELL_E2 || \ (devid) == PCI_CHIP_HASWELL_E3 || (devid) == PCI_CHIP_HASWELL_SDV_D1 || \ (devid) == PCI_CHIP_HASWELL_SDV_D2 || (devid) == PCI_CHIP_HASWELL_SDV_D3 || \ (devid) == PCI_CHIP_HASWELL_SDV_S1 || (devid) == PCI_CHIP_HASWELL_SDV_S2 || \ (devid) == PCI_CHIP_HASWELL_SDV_S3 || (devid) == PCI_CHIP_HASWELL_SDV_M1 || \ (devid) == PCI_CHIP_HASWELL_SDV_M2 || (devid) == PCI_CHIP_HASWELL_SDV_M3 || \ (devid) == PCI_CHIP_HASWELL_SDV_B1 || (devid) == PCI_CHIP_HASWELL_SDV_B2 || \ (devid) == PCI_CHIP_HASWELL_SDV_B3 || (devid) == PCI_CHIP_HASWELL_SDV_E1 || \ (devid) == PCI_CHIP_HASWELL_SDV_E2 || (devid) == PCI_CHIP_HASWELL_SDV_E3 || \ (devid) == PCI_CHIP_HASWELL_ULT_D1 || (devid) == PCI_CHIP_HASWELL_ULT_D2 || \ (devid) == PCI_CHIP_HASWELL_ULT_D3 || (devid) == PCI_CHIP_HASWELL_ULT_S1 || \ (devid) == PCI_CHIP_HASWELL_ULT_S2 || (devid) == PCI_CHIP_HASWELL_ULT_S3 || \ (devid) == PCI_CHIP_HASWELL_ULT_M1 || (devid) == PCI_CHIP_HASWELL_ULT_M2 || \ (devid) == PCI_CHIP_HASWELL_ULT_M3 || (devid) == PCI_CHIP_HASWELL_ULT_B1 || \ (devid) == PCI_CHIP_HASWELL_ULT_B2 || (devid) == PCI_CHIP_HASWELL_ULT_B3 || \ (devid) == PCI_CHIP_HASWELL_ULT_E1 || (devid) == PCI_CHIP_HASWELL_ULT_E2 || \ (devid) == PCI_CHIP_HASWELL_ULT_E3 || (devid) == PCI_CHIP_HASWELL_CRW_D1 || \ (devid) == PCI_CHIP_HASWELL_CRW_D2 || (devid) == PCI_CHIP_HASWELL_CRW_D3 || \ (devid) == PCI_CHIP_HASWELL_CRW_S1 || (devid) == PCI_CHIP_HASWELL_CRW_S2 || \ (devid) == PCI_CHIP_HASWELL_CRW_S3 || (devid) == PCI_CHIP_HASWELL_CRW_M1 || \ (devid) == PCI_CHIP_HASWELL_CRW_M2 || (devid) == PCI_CHIP_HASWELL_CRW_M3 || \ (devid) == PCI_CHIP_HASWELL_CRW_B1 || (devid) == PCI_CHIP_HASWELL_CRW_B2 || \ (devid) == PCI_CHIP_HASWELL_CRW_B3 || (devid) == PCI_CHIP_HASWELL_CRW_E1 || \ (devid) == PCI_CHIP_HASWELL_CRW_E2 || (devid) == PCI_CHIP_HASWELL_CRW_E3) #define IS_GEN75(devid) IS_HASWELL(devid) /* BRW */ #define PCI_CHIP_BROADWLL_M_GT1 0x1602 /* Intel(R) Broadwell Mobile - Halo (EDRAM) - GT1 */ #define PCI_CHIP_BROADWLL_D_GT1 0x1606 /* Intel(R) Broadwell U-Processor - GT1 */ #define PCI_CHIP_BROADWLL_S_GT1 0x160A /* Intel(R) Broadwell Server - GT1 */ #define PCI_CHIP_BROADWLL_W_GT1 0x160D /* Intel(R) Broadwell Workstation - GT1 */ #define PCI_CHIP_BROADWLL_U_GT1 0x160E /* Intel(R) Broadwell ULX - GT1 */ #define PCI_CHIP_BROADWLL_M_GT2 0x1612 /* Intel(R) Broadwell Mobile - Halo (EDRAM) - GT2 */ #define PCI_CHIP_BROADWLL_D_GT2 0x1616 /* Intel(R) Broadwell U-Processor - GT2 */ #define PCI_CHIP_BROADWLL_S_GT2 0x161A /* Intel(R) Broadwell Server - GT2 */ #define PCI_CHIP_BROADWLL_W_GT2 0x161D /* Intel(R) Broadwell Workstation - GT2 */ #define PCI_CHIP_BROADWLL_U_GT2 0x161E /* Intel(R) Broadwell ULX - GT2 */ #define PCI_CHIP_BROADWLL_M_GT3 0x1622 /* Intel(R) Broadwell Mobile - Halo (EDRAM) - GT3 */ #define PCI_CHIP_BROADWLL_D_GT3 0x1626 /* Intel(R) Broadwell U-Processor HD 6000 - GT3 */ #define PCI_CHIP_BROADWLL_UI_GT3 0x162B /* Intel(R) Broadwell U-Process Iris 6100 - GT3 */ #define PCI_CHIP_BROADWLL_S_GT3 0x162A /* Intel(R) Broadwell Server - GT3 */ #define PCI_CHIP_BROADWLL_W_GT3 0x162D /* Intel(R) Broadwell Workstation - GT3 */ #define PCI_CHIP_BROADWLL_U_GT3 0x162E /* Intel(R) Broadwell ULX - GT3 */ #define IS_BRW_GT1(devid) \ (devid == PCI_CHIP_BROADWLL_M_GT1 || \ devid == PCI_CHIP_BROADWLL_D_GT1 || \ devid == PCI_CHIP_BROADWLL_S_GT1 || \ devid == PCI_CHIP_BROADWLL_W_GT1 || \ devid == PCI_CHIP_BROADWLL_U_GT1) #define IS_BRW_GT2(devid) \ (devid == PCI_CHIP_BROADWLL_M_GT2 || \ devid == PCI_CHIP_BROADWLL_D_GT2 || \ devid == PCI_CHIP_BROADWLL_S_GT2 || \ devid == PCI_CHIP_BROADWLL_W_GT2 || \ devid == PCI_CHIP_BROADWLL_U_GT2) #define IS_BRW_GT3(devid) \ (devid == PCI_CHIP_BROADWLL_M_GT3 || \ devid == PCI_CHIP_BROADWLL_D_GT3 || \ devid == PCI_CHIP_BROADWLL_S_GT3 || \ devid == PCI_CHIP_BROADWLL_W_GT3 || \ devid == PCI_CHIP_BROADWLL_UI_GT3 || \ devid == PCI_CHIP_BROADWLL_U_GT3) #define IS_BROADWELL(devid) (IS_BRW_GT1(devid) || IS_BRW_GT2(devid) || IS_BRW_GT3(devid)) #define PCI_CHIP_CHV_0 0x22B0 #define PCI_CHIP_CHV_1 0x22B1 #define PCI_CHIP_CHV_2 0x22B2 #define PCI_CHIP_CHV_3 0x22B3 #define IS_CHERRYVIEW(devid) \ (devid == PCI_CHIP_CHV_0 || \ devid == PCI_CHIP_CHV_1 || \ devid == PCI_CHIP_CHV_2 || \ devid == PCI_CHIP_CHV_3) #define IS_GEN8(devid) (IS_BROADWELL(devid) || IS_CHERRYVIEW(devid)) /* SKL */ #define PCI_CHIP_SKYLAKE_ULT_GT1 0x1906 /* Intel(R) Skylake ULT - GT1 */ #define PCI_CHIP_SKYLAKE_ULT_GT2 0x1916 /* Intel(R) Skylake ULT - GT2 */ #define PCI_CHIP_SKYLAKE_ULT_GT3 0x1926 /* Intel(R) Skylake ULT - GT3 */ #define PCI_CHIP_SKYLAKE_ULT_GT2F 0x1921 /* Intel(R) Skylake ULT - GT2F */ #define PCI_CHIP_SKYLAKE_ULX_GT1 0x190E /* Intel(R) Skylake ULX - GT1 */ #define PCI_CHIP_SKYLAKE_ULX_GT2 0x191E /* Intel(R) Skylake ULX - GT2 */ #define PCI_CHIP_SKYLAKE_DT_GT1 0x1902 /* Intel(R) Skylake Desktop - GT1 */ #define PCI_CHIP_SKYLAKE_DT_GT2 0x1912 /* Intel(R) Skylake Desktop - GT2 */ #define PCI_CHIP_SKYLAKE_HALO_GT1 0x190B /* Intel(R) Skylake HALO - GT1 */ #define PCI_CHIP_SKYLAKE_HALO_GT2 0x191B /* Intel(R) Skylake HALO - GT2 */ #define PCI_CHIP_SKYLAKE_HALO_GT3 0x192B /* Intel(R) Skylake HALO - GT3 */ #define PCI_CHIP_SKYLAKE_HALO_GT4 0x193B /* Intel(R) Skylake HALO - GT4 */ #define PCI_CHIP_SKYLAKE_SRV_GT1 0x190A /* Intel(R) Skylake Server - GT1 */ #define PCI_CHIP_SKYLAKE_SRV_GT2 0x191A /* Intel(R) Skylake Server - GT2 */ #define PCI_CHIP_SKYLAKE_SRV_GT3 0x192A /* Intel(R) Skylake Server - GT3 */ #define PCI_CHIP_SKYLAKE_SRV_GT4 0x193A /* Intel(R) Skylake Server - GT4 */ #define IS_SKL_GT1(devid) \ (devid == PCI_CHIP_SKYLAKE_ULT_GT1 || \ devid == PCI_CHIP_SKYLAKE_ULX_GT1 || \ devid == PCI_CHIP_SKYLAKE_DT_GT1 || \ devid == PCI_CHIP_SKYLAKE_HALO_GT1 || \ devid == PCI_CHIP_SKYLAKE_SRV_GT1) #define IS_SKL_GT2(devid) \ (devid == PCI_CHIP_SKYLAKE_ULT_GT2 || \ devid == PCI_CHIP_SKYLAKE_ULT_GT2F || \ devid == PCI_CHIP_SKYLAKE_ULX_GT2 || \ devid == PCI_CHIP_SKYLAKE_DT_GT2 || \ devid == PCI_CHIP_SKYLAKE_HALO_GT2 || \ devid == PCI_CHIP_SKYLAKE_SRV_GT2) #define IS_SKL_GT3(devid) \ (devid == PCI_CHIP_SKYLAKE_ULT_GT3 || \ devid == PCI_CHIP_SKYLAKE_HALO_GT3 || \ devid == PCI_CHIP_SKYLAKE_SRV_GT3) #define IS_SKL_GT4(devid) \ (devid == PCI_CHIP_SKYLAKE_HALO_GT4 || \ devid == PCI_CHIP_SKYLAKE_SRV_GT4) #define IS_SKYLAKE(devid) (IS_SKL_GT1(devid) || IS_SKL_GT2(devid) || IS_SKL_GT3(devid) || IS_SKL_GT4(devid)) #define IS_GEN9(devid) IS_SKYLAKE(devid) #endif /* __CL_DEVICE_DATA_H__ */ Beignet-1.1.1-Source/src/cl_event.h000664 001750 001750 00000013152 12600662242 016216 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __CL_EVENT_H__ #define __CL_EVENT_H__ #include #include "cl_internals.h" #include "cl_driver.h" #include "cl_enqueue.h" #include "CL/cl.h" #define CL_ENQUEUE_EXECUTE_IMM 0 #define CL_ENQUEUE_EXECUTE_DEFER 1 typedef struct _user_event { cl_event event; /* The user event */ struct _user_event* next; /* Next user event in list */ } user_event; typedef struct _enqueue_callback { cl_event event; /* The event relative this enqueue callback */ enqueue_data data; /* Hold all enqueue callback's infomation */ cl_uint num_events; /* num events in wait list */ cl_event* wait_list; /* All event wait list this callback wait on */ user_event* wait_user_events; /* The head of user event list the callback wait on */ struct _enqueue_callback* next; /* The next enqueue callback in wait list */ } enqueue_callback; typedef void (CL_CALLBACK *EVENT_NOTIFY)(cl_event event, cl_int event_command_exec_status, void *user_data); typedef struct _user_callback { cl_int status; /* The execution status */ cl_bool executed; /* Indicat the callback function been called or not */ EVENT_NOTIFY pfn_notify; /* Callback function */ void* user_data; /* Callback user data */ struct _user_callback* next; /* Next event callback in list */ } user_callback; struct _cl_event { DEFINE_ICD(dispatch) uint64_t magic; /* To identify it as a sampler object */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* The context associated with event */ cl_event prev, next; /* We chain the memory buffers together */ cl_command_queue queue; /* The command queue associated with event */ cl_command_type type; /* The command type associated with event */ cl_int status; /* The execution status */ cl_gpgpu gpgpu; /* Current gpgpu, owned by this structure. */ cl_gpgpu_event gpgpu_event; /* The event object communicate with hardware */ user_callback* user_cb; /* The event callback functions */ enqueue_callback* enqueue_cb; /* This event's enqueue */ enqueue_callback* waits_head; /* The head of enqueues list wait on this event */ cl_bool emplict; /* Identify this event whether created by api emplict*/ cl_ulong timestamp[4];/* The time stamps for profiling. */ cl_ulong queued_timestamp; }; /* Create a new event object */ cl_event cl_event_new(cl_context, cl_command_queue, cl_command_type, cl_bool); /* Unref the object and delete it if no more reference on it */ void cl_event_delete(cl_event); /* Add one more reference to this object */ void cl_event_add_ref(cl_event); /* Register a user callback function for specific commond execution status */ cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *); /* Execute the event's callback if the event's status supersedes the callback's status. Free the callback if specified */ void cl_event_call_callback(cl_event event, cl_int status, cl_bool free_cb); /* Check events wait list for enqueue commonds */ cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context); /* Wait the all events in wait list complete */ cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue); /* New a enqueue suspend task */ void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *); /* Set the event status and call all callbacks */ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event, cl_int); /* Create the marker event */ cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Create the barrier event */ cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*); /* Get the cpu time */ cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time); /*Get the cpu time for queued*/ cl_int cl_event_get_queued_cpu_timestamp(cl_event event); /*get timestamp delate between end and start*/ cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp); /*Get start time stamp*/ cl_ulong cl_event_get_start_timestamp(cl_event event); /*Get end time stamp*/ cl_ulong cl_event_get_end_timestamp(cl_event event); /* Do the event profiling */ cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name); /* insert the user event */ cl_int cl_event_insert_user_event(user_event** p_u_ev, cl_event event); /* remove the user event */ cl_int cl_event_remove_user_event(user_event** p_u_ev, cl_event event); /* flush the event's pending gpgpu batch buffer and notify driver this gpgpu event has been flushed. */ cl_int cl_event_flush(cl_event event); #endif /* __CL_EVENT_H__ */ Beignet-1.1.1-Source/src/OCLConfig.h.in000664 001750 001750 00000000535 12605356050 016572 0ustar00yryr000000 000000 // the configured options and settings for LIBCL #define LIBCL_DRIVER_VERSION_MAJOR @LIBCL_DRIVER_VERSION_MAJOR@ #define LIBCL_DRIVER_VERSION_MINOR @LIBCL_DRIVER_VERSION_MINOR@ #define LIBCL_DRIVER_VERSION_PATCH @LIBCL_DRIVER_VERSION_PATCH@ #define LIBCL_C_VERSION_MAJOR @LIBCL_C_VERSION_MAJOR@ #define LIBCL_C_VERSION_MINOR @LIBCL_C_VERSION_MINOR@ Beignet-1.1.1-Source/src/cl_alloc.c000664 001750 001750 00000003310 12576733264 016174 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "cl_alloc.h" #include "cl_utils.h" #include #include #include static volatile int32_t cl_alloc_n = 0; LOCAL void* cl_malloc(size_t sz) { void * p = NULL; atomic_inc(&cl_alloc_n); p = malloc(sz); assert(p); return p; } LOCAL void* cl_aligned_malloc(size_t sz, size_t align) { void * p = NULL; atomic_inc(&cl_alloc_n); p = memalign(align, sz); assert(p); return p; } LOCAL void* cl_calloc(size_t n, size_t elem_size) { void *p = NULL; atomic_inc(&cl_alloc_n); p = calloc(n, elem_size); assert(p); return p; } LOCAL void* cl_realloc(void *ptr, size_t sz) { if (ptr == NULL) atomic_inc(&cl_alloc_n); return realloc(ptr, sz); } LOCAL void cl_free(void *ptr) { if (ptr == NULL) return; atomic_dec(&cl_alloc_n); free(ptr); ptr = NULL; } LOCAL size_t cl_report_unfreed(void) { return cl_alloc_n; } LOCAL void cl_report_set_all_freed(void) { cl_alloc_n = 0; } Beignet-1.1.1-Source/utests/runtime_alloc_host_ptr_buffer.cpp000664 001750 001750 00000001345 12576733264 023642 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void runtime_alloc_host_ptr_buffer(void) { const size_t n = 4096*100; // Setup kernel and buffers OCL_CREATE_KERNEL("runtime_alloc_host_ptr_buffer"); OCL_CREATE_BUFFER(buf[0], CL_MEM_ALLOC_HOST_PTR, n * sizeof(uint32_t), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 256; OCL_NDRANGE(1); // Check result uint32_t* mapptr = (uint32_t*)clEnqueueMapBuffer(queue, buf[0], CL_TRUE, CL_MAP_READ, 0, n*sizeof(uint32_t), 0, NULL, NULL, NULL); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(mapptr[i] == i / 2); clEnqueueUnmapMemObject(queue, buf[0], mapptr, 0, NULL, NULL); } MAKE_UTEST_FROM_FUNCTION(runtime_alloc_host_ptr_buffer); Beignet-1.1.1-Source/utests/compiler_mandelbrot_alternate.cpp000664 001750 001750 00000003646 12576733264 023620 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static int *dst = NULL; static const size_t w = 256; static const size_t h = 256; static const float criterium = 4.f; static void compiler_mandelbrot_alternate(void) { const size_t global[2] = {w, h}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); const float rcpWidth = 1.f / float(w); const float rcpHeight = 1.f / float(h); OCL_CREATE_KERNEL("compiler_mandelbrot_alternate"); OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpWidth); OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpHeight); OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); OCL_MAP_BUFFER(0); dst = (int *) buf_data[0]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_mandelbrot_alternate.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_alternate_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot_alternate); Beignet-1.1.1-Source/utests/compiler_math_builtin.cpp000664 001750 001750 00000000243 12576733264 022077 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_math_builtin(void) { OCL_CREATE_KERNEL("compiler_math_builtin"); } MAKE_UTEST_FROM_FUNCTION(compiler_math_builtin); Beignet-1.1.1-Source/utests/utest.cpp000664 001750 001750 00000012663 12600456606 016671 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest.cpp * \author Benjamin Segovia */ #include "utest.hpp" #include "utest_helper.hpp" #include #include #include #include #include #include #include #include struct signalMap { const char* signalName; int signalNum; }; using namespace std; vector *UTest::utestList = NULL; // Initialize and declare statistics struct RStatistics UTest::retStatistics; void releaseUTestList(void) { delete UTest::utestList; } void runSummaryAtExit(void) { // If case crashes, count it as fail, and accumulate finishrun if(UTest::retStatistics.finishrun != UTest::utestList->size()) { UTest::retStatistics.finishrun++; // UTest::retStatistics.failCount++; } printf("\nsummary:\n----------\n"); printf(" total: %zu\n",UTest::utestList->size()); printf(" run: %zu\n",UTest::retStatistics.actualrun); printf(" pass: %zu\n",UTest::retStatistics.passCount); printf(" fail: %zu\n",UTest::retStatistics.failCount); printf(" pass rate: %f\n", (UTest::retStatistics.actualrun)?((float)UTest::retStatistics.passCount/(float)UTest::retStatistics.actualrun):(float)0); releaseUTestList(); } void signalHandler( int signum ) { const char* name = NULL; signalMap arr[] = { {"SIGILL", SIGILL}, {"SIGFPE", SIGFPE}, {"SIGABRT", SIGABRT}, {"SIGBUS", SIGBUS}, {"SIGSEGV", SIGSEGV}, {"SIGHUP", SIGHUP}, {"SIGINT", SIGINT}, {"SIGQUIT", SIGQUIT}, {"SIGTERM", SIGTERM}, {NULL, -1} }; for(int i=0; arr[i].signalNum != -1 && arr[i].signalName != NULL; i++) { if(arr[i].signalNum == signum) name = arr[i].signalName; } printf(" Interrupt signal (%s) received.", name); UTest::retStatistics.failCount++; exit(signum); } void catch_signal(void){ struct sigaction sa; int sigs[] = { SIGILL, SIGFPE, SIGABRT, SIGBUS, SIGSEGV, SIGHUP, SIGINT, SIGQUIT, SIGTERM }; sa.sa_handler = signalHandler; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_RESETHAND; for(unsigned int i = 0; i < sizeof(sigs)/sizeof(sigs[0]); ++i) { if (sigaction(sigs[i], &sa, NULL) == -1) perror("Could not set signal handler"); } } UTest::UTest(Function fn, const char *name, bool isBenchMark, bool haveIssue, bool needDestroyProgram) : fn(fn), name(name), isBenchMark(isBenchMark), haveIssue(haveIssue), needDestroyProgram(needDestroyProgram) { if (utestList == NULL) { utestList = new vector; catch_signal(); atexit(runSummaryAtExit); } utestList->push_back(*this); } static bool strequal(const char *s1, const char *s2) { if (strcmp(s1, s2) == 0) return true; return false; } void UTest::do_run(struct UTest utest){ // Print function name printf("%s()", utest.name); fflush(stdout); retStatistics.actualrun++; // Run one case in utestList, print result [SUCCESS] or [FAILED] (utest.fn)(); } void UTest::run(const char *name) { if (name == NULL) return; if (utestList == NULL) return; for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) { const UTest &utest = (*utestList)[retStatistics.finishrun]; if (utest.name == NULL || utest.fn == NULL ) continue; if (strequal(utest.name, name)) { do_run(utest); cl_kernel_destroy(true); cl_buffer_destroy(); } } } void UTest::runAll(void) { if (utestList == NULL) return; for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) { const UTest &utest = (*utestList)[retStatistics.finishrun]; if (utest.fn == NULL) continue; do_run(utest); cl_kernel_destroy(utest.needDestroyProgram); cl_buffer_destroy(); } } void UTest::runAllNoIssue(void) { if (utestList == NULL) return; for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) { const UTest &utest = (*utestList)[retStatistics.finishrun]; if (utest.fn == NULL || utest.haveIssue || utest.isBenchMark) continue; do_run(utest); cl_kernel_destroy(utest.needDestroyProgram); cl_buffer_destroy(); } } void UTest::runAllBenchMark(void) { if (utestList == NULL) return; for (; retStatistics.finishrun < utestList->size(); ++retStatistics.finishrun) { const UTest &utest = (*utestList)[retStatistics.finishrun]; if (utest.fn == NULL || utest.haveIssue || !utest.isBenchMark) continue; do_run(utest); cl_kernel_destroy(utest.needDestroyProgram); cl_buffer_destroy(); } } void UTest::listAllCases() { if (utestList == NULL) return; for (size_t i = 0; i < utestList->size(); ++i) { const UTest &utest = (*utestList)[i]; if (utest.fn == NULL) continue; std::cout << utest.name << std::endl; } } Beignet-1.1.1-Source/utests/compiler_shift_right.cpp000664 001750 001750 00000002201 12576733264 021726 0ustar00yryr000000 000000 #include "utest_helper.hpp" typedef unsigned int uint; static void cpu(int global_id, uint *src, int *dst) { dst[global_id] = src[global_id] >> 24; } void compiler_shift_right(void) { const size_t n = 16; uint cpu_src[16]; int cpu_dst[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_shift_right"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((uint*)buf_data[0])[i] = 0x80000000 | rand(); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_shift_right); Beignet-1.1.1-Source/utests/compiler_insn_selection_masked_min_max.cpp000664 001750 001750 00000002143 12576733264 025471 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include static void compiler_insn_selection_masked_min_max(void) { const size_t n = 256; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_insn_selection_masked_min_max"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((float*)buf_data[0])[i] = float(i); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; float *src = (float*)buf_data[0]; for (uint32_t i = 0; i < n; ++i) { float cpu_dst; if (i % 16 > 5) cpu_dst = std::max(src[i], src[7]); else cpu_dst = std::min(src[i], src[10]); OCL_ASSERT(dst[i] == cpu_dst); } } MAKE_UTEST_FROM_FUNCTION(compiler_insn_selection_masked_min_max) Beignet-1.1.1-Source/utests/compiler_saturate.cpp000664 001750 001750 00000010627 12576733264 021257 0ustar00yryr000000 000000 #include "utest_helper.hpp" namespace { const int n = 16; // declaration only, we should create each template specification for each type. template T get_data(int idx, int part); /* the format of test data is as follows: * the first column is A * the second column is B * the third column is the expected result. */ #define DEF_TEMPLATE(TYPE, NAME) \ template <> \ TYPE get_data(int idx, int part) \ { \ static TYPE test_data[n][3] = { \ { 0, 0, 0 }, \ { 0, 1, 1 }, \ { 0, 2, 2 }, \ { -1, 1, 0 }, \ { 1, -2, -1 }, \ { 0, 110, 110 }, \ { -10, -10, -20 }, \ { CL_##NAME##_MIN, CL_##NAME##_MIN, CL_##NAME##_MIN }, \ { CL_##NAME##_MIN, CL_##NAME##_MAX, -1 }, \ { CL_##NAME##_MAX, 0, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX, 1, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX, 2, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX, CL_##NAME##_MAX, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX/2, CL_##NAME##_MAX/2, CL_##NAME##_MAX-1 }, \ { CL_##NAME##_MAX/2, CL_##NAME##_MAX/2+1, CL_##NAME##_MAX }, \ { CL_##NAME##_MAX/2+1, CL_##NAME##_MAX/2+1, CL_##NAME##_MAX } \ }; \ return test_data[idx][part]; \ } \ \ template <> \ u##TYPE get_data(int idx, int part) \ { \ static u##TYPE test_data[n][3] = { \ { 0, 0, 0 }, \ { CL_U##NAME##_MAX, 0, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX, 1, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX, 2, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX, CL_U##NAME##_MAX, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX/2, CL_U##NAME##_MAX/2, CL_U##NAME##_MAX-1 }, \ { CL_U##NAME##_MAX/2, CL_U##NAME##_MAX/2+1, CL_U##NAME##_MAX }, \ { CL_U##NAME##_MAX/2+1, CL_U##NAME##_MAX/2+1, CL_U##NAME##_MAX }\ }; \ return test_data[idx][part]; \ } DEF_TEMPLATE(int8_t, CHAR) DEF_TEMPLATE(int16_t, SHRT) DEF_TEMPLATE(int32_t, INT) //DEF_TEMPLATE(int64_t, LONG) template void test(const char *kernel_name) { T C[n] = { 0 }; T A[n] = { 0 }; T B[n] = { 0 }; for (int i = 0; i < n; i++) { A[i] = get_data(i, 0); B[i] = get_data(i, 1); } OCL_CREATE_KERNEL_FROM_FILE("compiler_saturate", kernel_name); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &C[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &A[0]); OCL_CREATE_BUFFER(buf[2], CL_MEM_COPY_HOST_PTR, n * sizeof(T), &B[0]); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = n; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); for (int i = 0; i < n; i++) { OCL_ASSERT(((T*)buf_data[0])[i] == get_data(i, 2)); } OCL_UNMAP_BUFFER(0); } } #define compiler_saturate(type, kernel) \ static void compiler_saturate_ ##type(void)\ {\ test(# kernel);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_saturate_ ## type); compiler_saturate(int8_t, test_char) compiler_saturate(uint8_t, test_uchar) compiler_saturate(int16_t, test_short) compiler_saturate(uint16_t, test_ushort) compiler_saturate(int32_t, test_int) compiler_saturate(uint32_t, test_uint) //compiler_saturate(int64_t, test_long) //compiler_saturate(uint64_t, test_ulong) Beignet-1.1.1-Source/utests/compiler_function_qualifiers.cpp000664 001750 001750 00000001024 12576733264 023467 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_function_qualifiers(void) { OCL_CREATE_KERNEL("compiler_function_qualifiers"); size_t param_value_size; void* param_value; cl_int err; err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, NULL, ¶m_value_size); OCL_ASSERT(err == CL_SUCCESS); param_value = malloc(param_value_size); err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, param_value_size, param_value, NULL); OCL_ASSERT(err == CL_SUCCESS); } MAKE_UTEST_FROM_FUNCTION(compiler_function_qualifiers); Beignet-1.1.1-Source/utests/compiler_popcount.cpp000664 001750 001750 00000004556 12576733264 021302 0ustar00yryr000000 000000 #include "utest_helper.hpp" namespace { template T get_max(); #define DEF_TEMPLATE(TYPE, NAME) \ template <> \ TYPE get_max() \ { \ static TYPE max = CL_##NAME##_MAX; \ return max; \ } \ \ template <> \ u##TYPE get_max() \ { \ static u##TYPE max = CL_U##NAME##_MAX; \ return max; \ } DEF_TEMPLATE(int8_t, CHAR) DEF_TEMPLATE(int16_t, SHRT) DEF_TEMPLATE(int32_t, INT) DEF_TEMPLATE(int64_t, LONG) template void test(const char *kernel_name, int s_type) { const int n = sizeof(T) * 8; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_popcount", kernel_name); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = n; OCL_MAP_BUFFER(0); ((T*)buf_data[0])[0] = 0; for (int32_t i = 1; i < (int32_t) n; ++i){ ((T*)buf_data[0])[i] = get_max() >> i; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_ASSERT(((T*)buf_data[1])[0] == 0); for (int i = 1; i < n; ++i){ OCL_ASSERT(((T*)buf_data[1])[i] == n-i-s_type); } OCL_UNMAP_BUFFER(1); } } #define compiler_popcount(type, kernel, s_type) \ static void compiler_popcount_ ##type(void)\ {\ test(# kernel, s_type);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_popcount_ ## type); compiler_popcount(int8_t, test_char, 1) compiler_popcount(uint8_t, test_uchar, 0) compiler_popcount(int16_t, test_short, 1) compiler_popcount(uint16_t, test_ushort, 0) compiler_popcount(int32_t, test_int, 1) compiler_popcount(uint32_t, test_uint, 0) compiler_popcount(int64_t, test_long, 1) compiler_popcount(uint64_t, test_ulong, 0) Beignet-1.1.1-Source/utests/compiler_local_slm.cpp000664 001750 001750 00000001716 12576733264 021373 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_local_slm(void) { const size_t n = 32; OCL_CREATE_KERNEL_FROM_FILE("compiler_local_slm", "compiler_local_slm"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == (i%16 + 2 + 1+ i/16)); OCL_UNMAP_BUFFER(0); } void compiler_local_slm1(void) { const size_t n = 2; OCL_CREATE_KERNEL_FROM_FILE("compiler_local_slm", "compiler_local_slm1"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = 1; locals[0] = 1; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); uint64_t * ptr = (uint64_t*)buf_data[0]; OCL_ASSERT((ptr[1] -ptr[0]) == 4); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_local_slm); MAKE_UTEST_FROM_FUNCTION(compiler_local_slm1); Beignet-1.1.1-Source/utests/compiler_long_shl.cpp000664 001750 001750 00000001741 12576733264 021231 0ustar00yryr000000 000000 #include #include #include #include "utest_helper.hpp" void compiler_long_shl(void) { const size_t n = 64; int64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_shl"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) src[i] = 1; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, sizeof(src)); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); int64_t *dest = ((int64_t *)buf_data[1]); for (int32_t i = 0; i < (int32_t) n; ++i) if (i > 7) OCL_ASSERT(dest[i] == ((int64_t)1) << i); else OCL_ASSERT(dest[i] == src[i] + 1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_shl); Beignet-1.1.1-Source/utests/compiler_mul_hi.cpp000664 001750 001750 00000001721 12576733264 020677 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_mul_hi(void) { const int n = 32; int src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_mul_hi"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { long long a = src1[i]; a *= src2[i]; a >>= 32; OCL_ASSERT(((int*)buf_data[2])[i] == (int)a); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_mul_hi); Beignet-1.1.1-Source/utests/compiler_local_memory_barrier.cpp000664 001750 001750 00000002635 12576733264 023617 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static void compiler_local_memory_barrier(void) { const size_t n = 1024; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_local_memory_barrier"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 64, NULL); // 16 x int // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results uint32_t *dst = (uint32_t*)buf_data[0]; for (uint32_t i = 0; i < n; i+=16) for (uint32_t j = 0; j < 16; ++j) OCL_ASSERT(dst[i+j] == 15-j); } MAKE_UTEST_FROM_FUNCTION(compiler_local_memory_barrier); Beignet-1.1.1-Source/utests/runtime_event.cpp000664 001750 001750 00000003346 12576733264 020421 0ustar00yryr000000 000000 #include "utest_helper.hpp" #define BUFFERSIZE 32*1024 void runtime_event(void) { const size_t n = BUFFERSIZE; cl_int cpu_src[BUFFERSIZE]; cl_event ev[3]; cl_int status = 0; cl_int value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_event"); OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); for(cl_uint i=0; i= CL_SUBMITTED); } buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status == CL_COMPLETE); OCL_FINISH(); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status <= CL_COMPLETE); } for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); } clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clReleaseEvent(ev[i]); } } MAKE_UTEST_FROM_FUNCTION(runtime_event); Beignet-1.1.1-Source/utests/compiler_data_types.cpp000664 001750 001750 00000000235 12576733264 021556 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_data_types(void) { OCL_CREATE_KERNEL("compiler_data_types"); } MAKE_UTEST_FROM_FUNCTION(compiler_data_types); Beignet-1.1.1-Source/utests/get_arg_info.cpp000664 001750 001750 00000006404 12576743747 020165 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" void test_get_arg_info(void) { int ret; uint32_t ret_val; cl_kernel_arg_type_qualifier type_qual; size_t ret_sz; char name[64]; // Setup kernel and buffers OCL_CALL (cl_kernel_init, "test_get_arg_info.cl", "test_get_arg_info", SOURCE, "-cl-kernel-arg-info"); //Arg 0 ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(ret_val), &ret_val, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == sizeof(cl_kernel_arg_address_qualifier)); OCL_ASSERT(ret_val == CL_KERNEL_ARG_ADDRESS_GLOBAL); ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(ret_val), &ret_val, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == sizeof(cl_kernel_arg_access_qualifier)); OCL_ASSERT(ret_val == CL_KERNEL_ARG_ACCESS_NONE); ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_NAME, sizeof(name), name, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == strlen("float*") + 1); OCL_ASSERT(!strcmp(name, "float*")); ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_NAME, sizeof(name), name, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == strlen("src") + 1); OCL_ASSERT(!strcmp(name, "src")); ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(type_qual), &type_qual, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == sizeof(cl_kernel_arg_type_qualifier)); OCL_ASSERT(type_qual == (CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE)); //Arg 1 ret = clGetKernelArgInfo(kernel, 1, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(ret_val), &ret_val, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == sizeof(cl_kernel_arg_address_qualifier)); OCL_ASSERT(ret_val == CL_KERNEL_ARG_ADDRESS_LOCAL); ret = clGetKernelArgInfo(kernel, 1, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(ret_val), &ret_val, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == sizeof(cl_kernel_arg_access_qualifier)); OCL_ASSERT(ret_val == CL_KERNEL_ARG_ACCESS_NONE); ret = clGetKernelArgInfo(kernel, 1, CL_KERNEL_ARG_TYPE_NAME, sizeof(name), name, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == strlen("int*") + 1); OCL_ASSERT(!strcmp(name, "int*")); ret = clGetKernelArgInfo(kernel, 1, CL_KERNEL_ARG_NAME, sizeof(name), name, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == strlen("dst") + 1); OCL_ASSERT(!strcmp(name, "dst")); ret = clGetKernelArgInfo(kernel, 1, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(type_qual), &type_qual, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == sizeof(cl_kernel_arg_type_qualifier)); OCL_ASSERT(type_qual == CL_KERNEL_ARG_TYPE_NONE); //Arg 2 ret = clGetKernelArgInfo(kernel, 2, CL_KERNEL_ARG_TYPE_NAME, sizeof(name), name, &ret_sz); OCL_ASSERT(ret == CL_SUCCESS); OCL_ASSERT(ret_sz == strlen("test_arg_struct") + 1); OCL_ASSERT(!strcmp(name, "test_arg_struct")); } MAKE_UTEST_FROM_FUNCTION(test_get_arg_info); Beignet-1.1.1-Source/utests/compiler_function_argument0.cpp000664 001750 001750 00000001137 12576733264 023232 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_function_argument0(void) { const size_t n = 2048; const short value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument0"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(short), &value); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[0])[i] == value); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument0); Beignet-1.1.1-Source/utests/compiler_switch.cpp000664 001750 001750 00000002767 12576733264 020736 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void cpu_compiler_switch(int *dst, int *src, int get_global_id0) { switch (get_global_id0) { case 0: dst[get_global_id0] = src[get_global_id0 + 4]; break; case 1: dst[get_global_id0] = src[get_global_id0 + 14]; break; case 2: dst[get_global_id0] = src[get_global_id0 + 13]; break; case 6: dst[get_global_id0] = src[get_global_id0 + 11]; break; case 7: dst[get_global_id0] = src[get_global_id0 + 10]; break; case 10: dst[get_global_id0] = src[get_global_id0 + 9]; break; case 12: dst[get_global_id0] = src[get_global_id0 + 6]; break; default: dst[get_global_id0] = src[get_global_id0 + 8]; break; } } static void compiler_switch(void) { const size_t n = 32; int cpu_dst[32], cpu_src[32]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_switch"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 32; ++i) cpu_src[i] = ((int32_t*)buf_data[1])[i] = i; OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < 16; ++i) cpu_compiler_switch(cpu_dst, cpu_src, i); for (int i = 0; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[0])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_switch) Beignet-1.1.1-Source/utests/compiler_fill_image_3d.cpp000664 001750 001750 00000002401 12576733264 022074 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void compiler_fill_image_3d(void) { const size_t w = 512; const size_t h = 512; const size_t depth = 5; uint32_t color = 0x12345678; cl_image_format format; cl_image_desc desc; memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, sizeof(cl_image_format)); format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = w; desc.image_height = h; desc.image_depth = depth; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image_3d"); OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(color), &color); globals[0] = w; globals[1] = h; globals[2] = depth; locals[0] = 16; locals[1] = 16; locals[2] = 1; OCL_NDRANGE(3); // Check result OCL_MAP_BUFFER(0); for (uint32_t k = 0; k < depth; k++) for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d); Beignet-1.1.1-Source/utests/compiler_basic_arithmetic.cpp000664 001750 001750 00000007365 12576733264 022726 0ustar00yryr000000 000000 #include "utest_helper.hpp" enum eTestOP { TEST_OP_ADD =0, TEST_OP_SUB, TEST_OP_MUL, TEST_OP_DIV, TEST_OP_REM }; template static void test_exec(const char* kernel_name) { const size_t n = 160; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_basic_arithmetic", kernel_name); buf_data[0] = (T*) malloc(sizeof(T) * n); buf_data[1] = (T*) malloc(sizeof(T) * n); for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[0])[i] = (T) rand(); for (uint32_t i = 0; i < n; ++i) ((T*)buf_data[1])[i] = (T) rand(); if(op == TEST_OP_DIV || op == TEST_OP_REM) { for (uint32_t i = 0; i < n; ++i) { if(((T*)buf_data[1])[i] == 0) ((T*)buf_data[1])[i] = (T) 1; } } OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, n * sizeof(T), buf_data[1]); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(2); if(op == TEST_OP_SUB) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] - ((T*)buf_data[1])[i])); } else if(op == TEST_OP_ADD) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] + ((T*)buf_data[1])[i])); } else if(op == TEST_OP_MUL) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] * ((T*)buf_data[1])[i])); } else if(op == TEST_OP_DIV) { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] / ((T*)buf_data[1])[i])); } else { for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((T*)buf_data[2])[i] == (T)(((T*)buf_data[0])[i] % ((T*)buf_data[1])[i])); } free(buf_data[0]); free(buf_data[1]); buf_data[0] = buf_data[1] = NULL; } #define DECL_TEST_SUB(type, alias, keep_program) \ static void compiler_sub_ ##alias(void)\ {\ test_exec("compiler_sub_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_sub_ ## alias, keep_program) #define DECL_TEST_ADD(type, alias, keep_program) \ static void compiler_add_ ##alias(void)\ {\ test_exec("compiler_add_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_add_ ## alias, keep_program) #define DECL_TEST_MUL(type, alias, keep_program) \ static void compiler_mul_ ##alias(void)\ {\ test_exec("compiler_mul_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_mul_ ## alias, keep_program) #define DECL_TEST_DIV(type, alias, keep_program) \ static void compiler_div_ ##alias(void)\ {\ test_exec("compiler_div_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_div_ ## alias, keep_program) #define DECL_TEST_REM(type, alias, keep_program) \ static void compiler_rem_ ##alias(void)\ {\ test_exec("compiler_rem_" # alias);\ }\ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_rem_ ## alias, keep_program) #define _DECL_TEST_FOR_ALL_TYPE(op, keep_program) \ DECL_TEST_##op(int8_t, char, true) \ DECL_TEST_##op(uint8_t, uchar, true) \ DECL_TEST_##op(int16_t, short, true) \ DECL_TEST_##op(uint16_t, ushort, true) \ DECL_TEST_##op(int32_t, int, true) \ DECL_TEST_##op(uint32_t, uint, keep_program) #define DECL_TEST_FOR_ALL_TYPE(op) _DECL_TEST_FOR_ALL_TYPE(op, true) #define DECL_TEST_FOR_ALL_TYPE_END(op) _DECL_TEST_FOR_ALL_TYPE(op, false) DECL_TEST_FOR_ALL_TYPE(SUB) DECL_TEST_FOR_ALL_TYPE(ADD) DECL_TEST_FOR_ALL_TYPE(MUL) DECL_TEST_FOR_ALL_TYPE(DIV) DECL_TEST_FOR_ALL_TYPE_END(REM) #undef DECL_TEST_FOR_ALL_TYPE Beignet-1.1.1-Source/utests/compiler_abs.cpp000664 001750 001750 00000015305 12576733264 020172 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include "string.h" template struct cl_vec { T ptr[((N+1)/2)*2]; //align to 2 elements. typedef cl_vec vec_type; cl_vec(void) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); } cl_vec(vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); } vec_type& operator= (vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } template vec_type& operator= (cl_vec & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } bool operator== (vec_type & other) { return !memcmp (this->ptr, other.ptr, sizeof(T) * N); } void abs(void) { int i = 0; for (; i < N; i++) { T f = ptr[i]; f = f < 0 ? -f : f; ptr[i] = f; } } }; template static void cpu (int global_id, cl_vec *src, cl_vec *dst) { cl_vec v = src[global_id]; v.abs(); dst[global_id] = v; } template static void cpu(int global_id, T *src, U *dst) { T f = src[global_id]; f = f < 0 ? -f : f; dst[global_id] = (U)f; } template static void gen_rand_val (cl_vec& vect) { int i = 0; memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2); for (; i < N; i++) { vect.ptr[i] = static_cast((rand() & 63) - 32); } } template static void gen_rand_val (T & val) { val = static_cast((rand() & 63) - 32); } template inline static void print_data (T& val) { if (std::is_unsigned::value) printf(" %u", val); else printf(" %d", val); } template static void dump_data (cl_vec* src, cl_vec* dst, int n) { U* val = reinterpret_cast(dst); n = n*((N+1)/2)*2; printf("\nRaw: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void dump_data (T* src, U* dst, int n) { printf("\nRaw: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(dst[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void compiler_abs_with_type(void) { const size_t n = 16; U cpu_dst[16]; T cpu_src[16]; // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(1); memset(buf_data[1], 0, sizeof(U) * n); OCL_UNMAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_src[i]); } memcpy(buf_data[0], cpu_src, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); // dump_data(cpu_src, cpu_dst, n); OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n)); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(0); } } #define ABS_TEST_TYPE_1(TYPE, UTYPE, KEEP_PROGRAM) \ static void compiler_abs_##TYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_abs.cl", "compiler_abs_"#TYPE, SOURCE, NULL); \ compiler_abs_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_abs_##TYPE, KEEP_PROGRAM); #define ABS_TEST_TYPE(TYPE, UTYPE) ABS_TEST_TYPE_1(TYPE, UTYPE, true) #define ABS_TEST_TYPE_END(TYPE, UTYPE) ABS_TEST_TYPE_1(TYPE, UTYPE, false) typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; ABS_TEST_TYPE(int, uint) ABS_TEST_TYPE(short, ushort) ABS_TEST_TYPE(char, uchar) ABS_TEST_TYPE(uint, uint) ABS_TEST_TYPE(ushort, ushort) ABS_TEST_TYPE(uchar, uchar) typedef cl_vec int2; typedef cl_vec int3; typedef cl_vec int4; typedef cl_vec int8; typedef cl_vec int16; typedef cl_vec uint2; typedef cl_vec uint3; typedef cl_vec uint4; typedef cl_vec uint8; typedef cl_vec uint16; ABS_TEST_TYPE(int2, uint2) ABS_TEST_TYPE(int3, uint3) ABS_TEST_TYPE(int4, uint4) ABS_TEST_TYPE(int8, uint8) ABS_TEST_TYPE(int16, uint16) ABS_TEST_TYPE(uint2, uint2) ABS_TEST_TYPE(uint3, uint3) ABS_TEST_TYPE(uint4, uint4) ABS_TEST_TYPE(uint8, uint8) ABS_TEST_TYPE(uint16, uint16) typedef cl_vec char2; typedef cl_vec char3; typedef cl_vec char4; typedef cl_vec char8; typedef cl_vec char16; typedef cl_vec uchar2; typedef cl_vec uchar3; typedef cl_vec uchar4; typedef cl_vec uchar8; typedef cl_vec uchar16; ABS_TEST_TYPE(char2, uchar2) ABS_TEST_TYPE(char3, uchar3) ABS_TEST_TYPE(char4, uchar4) ABS_TEST_TYPE(char8, uchar8) ABS_TEST_TYPE(char16, uchar16) ABS_TEST_TYPE(uchar2, uchar2) ABS_TEST_TYPE(uchar3, uchar3) ABS_TEST_TYPE(uchar4, uchar4) ABS_TEST_TYPE(uchar8, uchar8) ABS_TEST_TYPE(uchar16, uchar16) typedef cl_vec short2; typedef cl_vec short3; typedef cl_vec short4; typedef cl_vec short8; typedef cl_vec short16; typedef cl_vec ushort2; typedef cl_vec ushort3; typedef cl_vec ushort4; typedef cl_vec ushort8; typedef cl_vec ushort16; ABS_TEST_TYPE(short2, ushort2) ABS_TEST_TYPE(short3, ushort3) ABS_TEST_TYPE(short4, ushort4) ABS_TEST_TYPE(short8, ushort8) ABS_TEST_TYPE(short16, ushort16) ABS_TEST_TYPE(ushort2, ushort2) ABS_TEST_TYPE(ushort3, ushort3) ABS_TEST_TYPE(ushort4, ushort4) ABS_TEST_TYPE(ushort8, ushort8) ABS_TEST_TYPE_END(ushort16, ushort16) Beignet-1.1.1-Source/utests/compiler_byte_scatter.cpp000664 001750 001750 00000001036 12576733264 022111 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_byte_scatter(void) { const size_t n = 128; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_byte_scatter"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int8_t), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int8_t*)buf_data[0])[i] == (int8_t) i); } MAKE_UTEST_FROM_FUNCTION(compiler_byte_scatter); Beignet-1.1.1-Source/utests/compiler_box_blur.cpp000664 001750 001750 00000002105 12576733264 021233 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include static int w = 0; static int h = 0; static int sz = 0; static const size_t chunk = 64; static int *src = NULL, *dst = NULL; static void compiler_box_blur() { OCL_CREATE_KERNEL("compiler_box_blur"); /* Load the picture */ src = cl_read_bmp("sample.bmp", &w, &h); sz = w * h * sizeof(int); /* Run the kernel */ OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, sz, src); OCL_CREATE_BUFFER(buf[1], 0, sz, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(int), &w); OCL_SET_ARG(3, sizeof(int), &h); OCL_SET_ARG(4, sizeof(int), &chunk); globals[0] = size_t(w/4); globals[1] = h/chunk + ((h%chunk)?1:0); locals[0] = 16; locals[1] = 1; free(src); OCL_NDRANGE(2); OCL_MAP_BUFFER(1); dst = (int*) buf_data[1]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_box_blur.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_box_blur_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_box_blur); Beignet-1.1.1-Source/utests/enqueue_built_in_kernels.cpp000664 001750 001750 00000001331 12576733264 022604 0ustar00yryr000000 000000 #include "utest_helper.hpp" void enqueue_built_in_kernels(void) { char* built_in_kernel_names; size_t built_in_kernels_size; cl_int err = CL_SUCCESS; size_t ret_sz; OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, &built_in_kernels_size); built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) ); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz); OCL_ASSERT(ret_sz == built_in_kernels_size); cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, &device, built_in_kernel_names, &err); OCL_ASSERT(built_in_prog != NULL); } MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels); Beignet-1.1.1-Source/utests/enqueue_copy_buf_unaligned.cpp000664 001750 001750 00000005656 12576733264 023126 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb) { unsigned int i; OCL_MAP_BUFFER(0); for (i=0; i < sz; i++) { ((char*)buf_data[0])[i] = (rand() & 31); } OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); for (i=0; i < sz; i++) { ((char*)buf_data[1])[i] = 64; } OCL_UNMAP_BUFFER(1); if (src_off + cb > sz || dst_off + cb > sz) { /* Expect Error. */ OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1], src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); return; } OCL_ASSERT(!clEnqueueCopyBuffer(queue, buf[0], buf[1], src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); #if 0 printf ("@@@@@@@@@ cb is %d\n", cb); printf ("@@@@@@@@@ src_off is %d\n", src_off); printf ("@@@@@@@@@ dst_off is %d\n", dst_off); printf("\n########### Src buffer: \n"); for (i = 0; i < sz; ++i) printf(" %2.2u", ((unsigned char*)buf_data[0])[i]); printf("\n########### dst buffer: \n"); for (i = 0; i < sz; ++i) printf(" %2.2u", ((unsigned char*)buf_data[1])[i]); #endif // Check results for (i = 0; i < cb; ++i) { if (((char*)buf_data[0])[i +src_off] != ((char*)buf_data[1])[i + dst_off]) { printf ("different index is %d\n", i); OCL_ASSERT(0); } } for (i = 0; i < dst_off; ++i) { if (((char*)buf_data[1])[i] != 64) { printf ("wrong write, different index is %d\n", i); OCL_ASSERT(0); } } for (i = dst_off + cb; i < sz; ++i) { if (((char*)buf_data[1])[i] != 64) { printf ("wrong write, different index is %d\n", i); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } void enqueue_copy_buf_unaligned(void) { size_t i; size_t j; const size_t sz = 1024; int offset = 0; OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(char), NULL); #if 1 /* Test the same offset cases. */ for (i=0; i #include "utest_helper.hpp" static void compiler_fill_image_1d(void) { const size_t w = 2048; cl_image_format format; cl_image_desc desc; memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, sizeof(cl_image_format)); format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = w; desc.image_row_pitch = 0; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image_1d"); OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL); OCL_MAP_BUFFER_GTT(0); for (uint32_t i = 0; i < w; i++) { ((uint32_t*)buf_data[0])[i] = 0; } OCL_UNMAP_BUFFER_GTT(0); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = w/2; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER_GTT(0); //printf("------ The image result is: -------\n"); for (uint32_t i = 0; i < w/2; i++) { //printf(" %2x", ((uint32_t *)buf_data[0])[i]); OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 0x03020100); } for (uint32_t i = w/2; i < w; i++) { //printf(" %2x", ((uint32_t *)buf_data[0])[i]); OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 0); } OCL_UNMAP_BUFFER_GTT(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_1d); Beignet-1.1.1-Source/utests/compiler_program_objects.cpp000664 001750 001750 00000004255 12576733264 022607 0ustar00yryr000000 000000 /* test OpenCL 1.1 Program Objects (section 5.6) * test creating program objects, * build program executable, * build options * query program objects */ #include "utest_helper.hpp" void compiler_program_objects(void) { OCL_CREATE_KERNEL("empty"); // set up global vars OCL_CALL(clRetainProgram, program); OCL_CALL(clReleaseProgram, program); OCL_CALL(clBuildProgram, program, 1, &device, "-Dname -Dname2=def -ldir " "-cl-opt-disable -cl-strict-aliasing -cl-mad-enable -cl-no-signed-zeros " "-cl-finite-math-only -cl-fast-relaxed-math -cl-unsafe-math-optimizations " "-cl-single-precision-constant -cl-denorms-are-zero " "-w -Werror -cl-std=CL1.1", NULL, NULL); const int pi[] = {CL_PROGRAM_REFERENCE_COUNT, CL_PROGRAM_CONTEXT, CL_PROGRAM_NUM_DEVICES, CL_PROGRAM_DEVICES, CL_PROGRAM_SOURCE, CL_PROGRAM_BINARY_SIZES, CL_PROGRAM_BINARIES,}; const int pbi[] = {CL_PROGRAM_BUILD_STATUS, CL_PROGRAM_BUILD_OPTIONS, CL_PROGRAM_BUILD_LOG,}; char param_value[1024]; size_t pv_size; int i; for(i=0; i #include "utest_helper.hpp" static void cpu(int global_id, double *src, double *dst) { double f = src[global_id]; double d = 1.234567890123456789; dst[global_id] = global_id < 14 ? (d * (f + d)) : 14; } void compiler_double(void) { const size_t n = 16; double cpu_dst[n], cpu_src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 1; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((double*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_double); Beignet-1.1.1-Source/utests/utest_error.h000664 001750 001750 00000001660 12576733264 017554 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #ifndef __UTEST_ERROR_H__ #define __UTEST_ERROR_H__ #include extern const char *err_msg[]; extern const size_t err_msg_n; #endif /* __UTEST_ERROR_H__ */ Beignet-1.1.1-Source/utests/enqueue_copy_buf.cpp000664 001750 001750 00000003214 12576733264 021064 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb) { unsigned int i; OCL_MAP_BUFFER(0); for (i=0; i < sz; i++) { ((char*)buf_data[0])[i] = (rand() & 63); } OCL_UNMAP_BUFFER(0); if (src_off + cb > sz || dst_off + cb > sz) { /* Expect Error. */ OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1], src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); return; } OCL_ASSERT(!clEnqueueCopyBuffer(queue, buf[0], buf[1], src_off, dst_off, cb*sizeof(char), 0, NULL, NULL)); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); #if 0 printf("\n########### Src buffer: \n"); for (i = 0; i < cb; ++i) printf(" %2.2u", ((unsigned char*)buf_data[0])[i + src_off]); printf("\n########### dst buffer: \n"); for (i = 0; i < cb; ++i) printf(" %2.2u", ((unsigned char*)buf_data[1])[i + dst_off]); #endif // Check results for (i = 0; i < cb; ++i) { if (((char*)buf_data[0])[i + src_off] != ((char*)buf_data[1])[i + dst_off]) { printf ("different index is %d\n", i); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); } void enqueue_copy_buf(void) { size_t i; size_t j; const size_t sz = 1024; OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(char), NULL); for (i=0; i static void double_precision_check(void) { const size_t n = 16; //8192 * 4; double d0 = 0.12345678912345678; double d1 = 0.12355678922345678; float cpu_result = d1 - d0; // Setup kernel and buffers OCL_CREATE_KERNEL("double_precision_check"); //OCL_CREATE_KERNEL("compiler_array"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((float*)buf_data[0])[i] = 0; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); bool precisionOK = true; for (uint32_t i = 0; i < n; ++i) { float error = ((float*)buf_data[1])[i] - cpu_result; if (error != 0) precisionOK = false; OCL_ASSERT((fabs(error) < 1e-4)); } if (!precisionOK) printf("\n - WARN: GPU doesn't have correct double precision. Got %.7G, expected %.7G\n", ((float*)buf_data[1])[0], cpu_result); } MAKE_UTEST_FROM_FUNCTION(double_precision_check); Beignet-1.1.1-Source/utests/profiling_exec.cpp000664 001750 001750 00000007363 12576733264 020535 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include "string.h" static void cpu_exec (int n, float* src, float* dst) { int i = 0; for (; i < n; i++) { float f = src[i]; f = f < 0 ? -f : f; dst[i] = f; } } #define QUEUE_SECONDS_LIMIT 10 #define SUBMIT_SECONDS_LIMIT 20 #define COMMAND_SECONDS_LIMIT 10 static void check_profiling_time(cl_ulong queued, cl_ulong submit, cl_ulong start, cl_ulong end) { size_t profiling_resolution = 0; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(profiling_resolution), &profiling_resolution, NULL); /* Convert the time to second. */ double queue_to_submit = (double)(submit - queued)*1e-9; double submit_to_start = (double)(start - submit)*1e-9; double start_to_end = (double)(end - start)*1e-9; //printf("Profiling info:\n"); //printf("Time from queue to submit : %fms\n", (double)(queue_to_submit) * 1000.f ); //printf( "Time from submit to start : %fms\n", (double)(submit_to_start) * 1000.f ); //printf( "Time from start to end: %fms\n", (double)(start_to_end) * 1000.f ); OCL_ASSERTM(queued <= submit, "Enqueue time is later than submit time, invalid\n"); OCL_ASSERTM(submit <= start, "Submit time is later than start time, invalid\n"); OCL_ASSERTM(start <= end, "Start time is later than end time, invalid\n"); OCL_ASSERTM(queue_to_submit <= QUEUE_SECONDS_LIMIT, "Too large time from queue to submit\n"); OCL_ASSERTM(submit_to_start <= QUEUE_SECONDS_LIMIT, "Too large time from submit to start\n"); OCL_ASSERTM(start_to_end <= QUEUE_SECONDS_LIMIT, "Too large time from start to end\n"); } static void profiling_exec(void) { const size_t n = 512; cl_int status = CL_SUCCESS; cl_command_queue profiling_queue = NULL; cl_command_queue tmp_queue = NULL; float* cpu_src = (float *)malloc(n*sizeof(float)); float* cpu_dst = (float *)malloc(n*sizeof(float)); cl_event exec_event; cl_ulong time_queue, time_submit, time_start, time_end; /* Because the profiling prop, we can not use default queue. */ profiling_queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &status); OCL_ASSERT(status == CL_SUCCESS); /* save the default queue. */ tmp_queue = queue; queue = profiling_queue; OCL_CREATE_KERNEL("compiler_fabs"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 256; OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); cpu_exec(n, cpu_src, cpu_dst); // Run the kernel on GPU OCL_CALL(clEnqueueNDRangeKernel, queue, kernel, 1, NULL, globals, locals, 0, NULL, &exec_event); OCL_CALL(clWaitForEvents, 1, &exec_event); OCL_CALL(clGetEventProfilingInfo, exec_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &time_queue, NULL); OCL_CALL(clGetEventProfilingInfo, exec_event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &time_submit, NULL); OCL_CALL(clGetEventProfilingInfo, exec_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time_start, NULL); OCL_CALL(clGetEventProfilingInfo, exec_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time_end, NULL); check_profiling_time(time_queue, time_submit, time_start, time_end); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); queue = tmp_queue; clReleaseCommandQueue(profiling_queue); free(cpu_dst); free(cpu_src); } MAKE_UTEST_FROM_FUNCTION(profiling_exec); Beignet-1.1.1-Source/utests/compiler_displacement_map_element.cpp000664 001750 001750 00000003322 12576733264 024437 0ustar00yryr000000 000000 #include "utest_helper.hpp" typedef unsigned int uint; const int W = 16, H = 16; const int SIZE = W * H; uint in_1[SIZE]; uint disp_map[SIZE]; uint out_1[SIZE]; uint cpu(const int cx, const int cy, const uint *in, const uint *disp_map, int w, int h) { uint c = disp_map[cy * w + cx]; int x_pos = cx + c; int y_pos = cy + c; if(0 <= x_pos && x_pos < w && 0 <= y_pos && y_pos < h) return in[y_pos * w + x_pos]; else return 0; } void test() { OCL_MAP_BUFFER(2); for(int y=0; y #include "utest_helper.hpp" float cpu(float e0, float e1, float x) { x = (x - e0) / (e1 - e0); if (x >= 1) x = 1.f; if (x <= 0) x = 0.f; return x * x * (3 - 2 * x); } void compiler_smoothstep(void) { const int n = 32; float src1[n], src2[n], src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_smoothstep"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { float a = 0.1f * (rand() & 15) - 0.75f; float b = a + 0.1f * (rand() & 15) + 0.1f; float c = 0.1f * (rand() & 15) - 0.75f; src1[i] = ((float*)buf_data[0])[i] = a; src2[i] = ((float*)buf_data[1])[i] = b; src3[i] = ((float*)buf_data[2])[i] = c; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) { float a = ((float*)buf_data[3])[i]; float b = cpu(src1[i], src2[i], src3[i]); OCL_ASSERT(fabsf(a - b) < 1e-4f); } OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(compiler_smoothstep); Beignet-1.1.1-Source/utests/compiler_function_argument3.cpp000664 001750 001750 00000001461 12576733264 023235 0ustar00yryr000000 000000 #include "utest_helper.hpp" struct sfloat8 { float a; float b; float c; float d; float e; float f; float g; float h; }; void compiler_function_argument3(void) { sfloat8 arg6; arg6.a = 3.0f; arg6.h = 4.0f; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument3"); OCL_CREATE_BUFFER(buf[0], 0, sizeof(struct sfloat8) * 8, NULL); OCL_SET_ARG(0, sizeof(arg6), &arg6); OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = 1; locals[0] = 1; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); /* Check results */ sfloat8 *dst = (sfloat8*)buf_data[0]; OCL_ASSERT(dst[0].a == 3.0f); OCL_ASSERT(dst[0].b == 12.0f); OCL_ASSERT(dst[0].h == 7.0f); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument3); Beignet-1.1.1-Source/utests/utest_file_map.cpp000664 001750 001750 00000004740 12576733264 020534 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_file_map.hpp" #include "CL/cl.h" #include #include #include #include #include #include #include #include int cl_file_map_init(cl_file_map_t *fm) { assert(fm); memset(fm,0,sizeof(*fm)); return CL_SUCCESS; } void cl_file_map_destroy(cl_file_map_t *fm) { if (fm->mapped) { munmap(fm->start, fm->size); fm->start = fm->stop = 0; fm->size = 0; fm->mapped = CL_FALSE; } if(fm->fd) { close(fm->fd); fm->fd = 0; } free(fm->name); memset(fm,0,sizeof(*fm)); } void cl_file_map_delete(cl_file_map_t *fm) { if (fm == NULL) return; cl_file_map_destroy(fm); free(fm); } cl_file_map_t* cl_file_map_new(void) { cl_file_map_t *fm = NULL; if ((fm = (cl_file_map_t *) calloc(1, sizeof(cl_file_map_t))) == NULL) goto error; if (cl_file_map_init(fm) != CL_SUCCESS) goto error; exit: return fm; error: cl_file_map_delete(fm); fm = NULL; goto exit; } int cl_file_map_open(cl_file_map_t *fm, const char *name) { int err = CL_FILE_MAP_SUCCESS; /* Open the file */ fm->fd = open(name, O_RDONLY); if(fm->fd < 0) { err = CL_FILE_MAP_FILE_NOT_FOUND; goto error; } if ((fm->name = (char*) calloc(strlen(name) + 1, sizeof(char))) == NULL) goto error; sprintf(fm->name, "%s", name); /* Map it */ fm->size = lseek(fm->fd, 0, SEEK_END); lseek(fm->fd, 0, SEEK_SET); fm->start = mmap(0, fm->size, PROT_READ, MAP_SHARED, fm->fd, 0); if(fm->start == NULL) { err = CL_FILE_MAP_FAILED_TO_MMAP; goto error; } fm->stop = ((char *) fm->start) + fm->size; fm->mapped = CL_TRUE; exit: return err; error: cl_file_map_destroy(fm); goto exit; } Beignet-1.1.1-Source/utests/builtin_tgamma.cpp000664 001750 001750 00000003313 12576733264 020523 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" #include void builtin_tgamma(void) { const int n = 1024; float src[n]; float ULPSIZE_NO_FAST_MATH = 16.0; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_tgamma"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; float ULPSIZE_FACTOR = select_ulpsize(ULPSIZE_FAST_MATH,ULPSIZE_NO_FAST_MATH); cl_device_fp_config fp_config; clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, 0); bool denormals_supported = fp_config & CL_FP_DENORM; float max_ulp = 0, max_ulp_at = 0; for (int j = 0; j < 128; j ++) { OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float*)buf_data[0])[i] = j - 64 + i*0.001f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; for (int i = 0; i < n; ++i) { float cpu = tgamma(src[i]); if (!denormals_supported && std::fpclassify(cpu)==FP_SUBNORMAL && dst[i]==0) { cpu = 0; } if (fabsf(cpu - dst[i]) > cl_FLT_ULP(cpu) * max_ulp) { max_ulp = fabsf(cpu - dst[i]) / cl_FLT_ULP(cpu); max_ulp_at = src[i]; } if (isinf(cpu)) { OCL_ASSERT(isinf(dst[i])); } else if (fabsf(cpu - dst[i]) >= cl_FLT_ULP(cpu) * ULPSIZE_FACTOR) { printf("%f %f %f\n", src[i], cpu, dst[i]); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(1); } printf("max error=%f ulp at x=%f ", max_ulp, max_ulp_at); } MAKE_UTEST_FROM_FUNCTION(builtin_tgamma); Beignet-1.1.1-Source/utests/compiler_atomic_functions.cpp000664 001750 001750 00000005707 12576733264 022776 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include #include #include #define GROUP_NUM 16 #define LOCAL_SIZE 256 static void cpu_compiler_atomic(int *dst, int *src) { dst[4] = 0xffffffff; int tmp[16] = { 0 }; tmp[4] = -1; for(int j=0; j>4)); break; case 5: tmp[i] |= src[j]<<(j>>4); break; case 6: tmp[i] ^= src[j]; break; case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break; case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break; case 9: tmp[i] = (unsigned int)tmp[i] < (unsigned int)(-src[j]) ? tmp[i] : -src[j]; break; case 10: tmp[i] = (unsigned int)tmp[i] > (unsigned int)(src[j]) ? tmp[i] : src[j]; break; case 11: tmp[i] = src[10]; break; default: break; } } for(int k=0; k>4)); break; case 5: dst[i] |= src[j]<<(j>>4); break; case 6: dst[i] ^= src[j]; break; case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break; case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break; case 9: dst[i] = (unsigned int)dst[i] < (unsigned int)(-src[j]) ? dst[i] : -src[j]; break; case 10: dst[i] = (unsigned int)dst[i] > (unsigned int)(src[j]) ? dst[i] : src[j]; break; case 11: dst[i] = src[10]; break; default: break; } } } for(int i=0; i<12; i++) dst[i+12] = tmp[i]; } static void compiler_atomic_functions(void) { const size_t n = GROUP_NUM * LOCAL_SIZE; int cpu_dst[24] = {0}, cpu_src[256]; globals[0] = n; locals[0] = LOCAL_SIZE; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_atomic_functions"); OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 16 * sizeof(int), NULL); OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); OCL_MAP_BUFFER(0); memset(buf_data[0], 0, 24 * sizeof(int)); ((int *)buf_data[0])[4] = -1; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < locals[0]; ++i) cpu_src[i] = ((int*)buf_data[1])[i] = rand() & 0xff; cpu_compiler_atomic(cpu_dst, cpu_src); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for(int i=0; i<24; i++) { //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]); OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); } OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions) Beignet-1.1.1-Source/utests/compiler_fill_image_2d_array.cpp000664 001750 001750 00000004030 12576733264 023271 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void compiler_fill_image_2d_array(void) { const size_t w = 64; const size_t h = 16; const size_t array = 8; cl_image_format format; cl_image_desc desc; size_t origin[3] = { }; size_t region[3]; uint32_t* dst; memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, sizeof(cl_image_format)); format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; desc.image_width = w; desc.image_height = h; desc.image_row_pitch = 0;//w * sizeof(uint32_t); desc.image_array_size = array; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image_2d_array"); OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL); OCL_MAP_BUFFER_GTT(0); memset(buf_data[0], 0, sizeof(uint32_t) * w * h * array); OCL_UNMAP_BUFFER_GTT(0); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = w/2; locals[0] = 16; globals[1] = h; locals[1] = 4; globals[2] = array; locals[2] = 4; OCL_NDRANGE(3); // Check result region[0] = w; region[1] = h; region[2] = array; dst = (uint32_t*)malloc(w*h*array*sizeof(uint32_t)); OCL_READ_IMAGE(buf[0], origin, region, dst); #if 0 printf("------ The image result is: -------\n"); for (uint32_t k = 0; k < array; k++) { for (uint32_t j = 0; j < h; j++) { for (uint32_t i = 0; i < w; i++) { printf(" %2x", dst[k*h*w + j*w + i]); } printf("\n"); } printf("\n"); } #endif for (uint32_t k = 0; k < array - 1; k++) { for (uint32_t j = 0; j < h; j++) { for (uint32_t i = 0; i < w/2; i++) { OCL_ASSERT(dst[k*w*h + j*w + i] == 0x03020100); } for (uint32_t i = w/2; i < w; i++) { OCL_ASSERT(dst[k*w*h + j*w + i] == 0); } } } for (uint32_t j = 0; j < h; j++) { for (uint32_t i = 0; i < w; i++) { OCL_ASSERT(dst[(array - 1)*w*h + j*w + i] == 0x0); } } free(dst); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_2d_array); Beignet-1.1.1-Source/utests/compiler_ceil.cpp000664 001750 001750 00000002155 12576733264 020340 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void cpu(int global_id, float *src, float *dst) { dst[global_id] = ceilf(src[global_id]); } void compiler_ceil(void) { const size_t n = 16; float cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_ceil"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_ceil); Beignet-1.1.1-Source/utests/compiler_load_bool_imm.cpp000664 001750 001750 00000001506 12576733264 022217 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_load_bool_imm(void) { const size_t n = 1024; const size_t local_size = 16; const int copiesPerWorkItem = 5; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_load_bool_imm"); OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, local_size*copiesPerWorkItem*sizeof(int), NULL); // 16 x int OCL_SET_ARG(2, sizeof(int), &copiesPerWorkItem); // 16 x int // Run the kernel globals[0] = n; locals[0] = local_size; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results int *dst = (int*)buf_data[0]; for (uint32_t i = 0; i < n * copiesPerWorkItem; i++) OCL_ASSERT(dst[i] == copiesPerWorkItem); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_load_bool_imm); Beignet-1.1.1-Source/utests/compiler_function_argument.cpp000664 001750 001750 00000001131 12576733264 023144 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_function_argument(void) { const size_t n = 2048; const int value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_argument"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(int), &value); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[0])[i] == value); } MAKE_UTEST_FROM_FUNCTION(compiler_function_argument); Beignet-1.1.1-Source/utests/compiler_unstructured_branch1.cpp000664 001750 001750 00000003073 12576733264 023571 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_unstructured_branch1(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_unstructured_branch1"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[1])[i] == 3); // Third control flow for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 8; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 8; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 3); } MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch1); Beignet-1.1.1-Source/utests/compiler_get_sub_group_id.cpp000664 001750 001750 00000001375 12576733264 022747 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_get_sub_group_id(void) { const size_t n = 256; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_get_sub_group_id"); OCL_CREATE_BUFFER(buf[0], 0, (n+1) * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) (n+1); ++i) ((int*)buf_data[0])[i] = -1; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(0); int* dst = (int *)buf_data[0]; OCL_ASSERT(8 == dst[0] || 16 == dst[0]); for (int32_t i = 1; i < (int32_t) n; ++i){ OCL_ASSERT((i-1) % dst[0] == dst[i]); } OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_id); Beignet-1.1.1-Source/utests/compiler_vect_compare.cpp000664 001750 001750 00000002214 12576733264 022067 0ustar00yryr000000 000000 #include "utest_helper.hpp" typedef struct { int x; int y; int z; int w; } int4; void compiler_vect_compare(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_vect_compare"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int4), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int4), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) { ((int4*)buf_data[0])[i].x = i & 0x1; ((int4*)buf_data[0])[i].y = i & 0x2; ((int4*)buf_data[0])[i].z = i & 0x4; ((int4*)buf_data[0])[i].w = i & 0x8; } OCL_UNMAP_BUFFER(0); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) { OCL_ASSERT(((int4*)buf_data[1])[i].x == (int)((i&0x1)?0xffffffff:0)); OCL_ASSERT(((int4*)buf_data[1])[i].y == (int)((i&0x2)?0xffffffff:0)); OCL_ASSERT(((int4*)buf_data[1])[i].z == (int)((i&0x4)?0xffffffff:0)); OCL_ASSERT(((int4*)buf_data[1])[i].w == (int)((i&0x8)?0xffffffff:0)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_vect_compare); Beignet-1.1.1-Source/utests/compiler_upsample_long.cpp000664 001750 001750 00000002002 12576733264 022260 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" void compiler_upsample_long(void) { const int n = 32; int src1[n]; unsigned int src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_upsample_long"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(unsigned int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((unsigned int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) OCL_ASSERT(((int64_t*)buf_data[2])[i] == (((int64_t)(src1[i]) << 32) | src2[i])); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_upsample_long); Beignet-1.1.1-Source/utests/my_test.cpp000664 001750 001750 00000004476 12576733264 017226 0ustar00yryr000000 000000 #include "utest_helper.hpp" struct seg { unsigned int end, color, offset; seg(int e, int c):end(e), color(c) {} }; typedef struct seg seg; typedef struct { std::vector segs; } rle_data; struct rle_image { int width, height; std::vector data; rle_image(int w, int h):width(w), height(h) {} }; typedef struct rle_image rle_image; static void read_data(const char *filename, rle_image &image) { FILE *fp; char line[4096]; int i; fp = fopen(filename, "r"); for (i = 0; i < image.height; i++) { char *nptr = line, *endptr; rle_data d; int start = 0; if (fgets(line, sizeof(line), fp) == NULL) break; for (;;) { int len = strtol(nptr, &endptr, 10); nptr = endptr; int color = strtol(nptr, &endptr, 10); nptr = endptr; seg s(start + len, color); d.segs.push_back(s); if (*endptr == '\n' || *endptr == 0) break; start += len; } image.data.push_back(d); } fclose(fp); } static void prepare_rle_buffer(rle_image &image, std::vector &rle_buffer, int *offsets) { int offset = 0; for (int i = 0; i < image.height; i++) { unsigned int j; rle_data d = image.data[i]; for (j = 0; j < d.segs.size(); j++) { rle_buffer.push_back(d.segs[j].end); rle_buffer.push_back(d.segs[j].color); } offsets[i] = offset; offset += j; } } static void expand_rle(rle_image &image) { std::vector rle_buffer; int offsets[image.height]; int w = image.width/16; prepare_rle_buffer(image, rle_buffer, offsets); OCL_CREATE_KERNEL("my_test"); OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, 2*sizeof(int)*rle_buffer.size(), &rle_buffer[0]); OCL_CREATE_BUFFER(buf[1], CL_MEM_COPY_HOST_PTR, sizeof(int)*image.height, offsets); OCL_CREATE_BUFFER(buf[2], 0, image.width*image.height, NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(w), &w); globals[0] = image.height; locals[0] = 16; OCL_NDRANGE(1); #if 1 OCL_MAP_BUFFER(2); for (int i = 0; i < image.height; i++) { for (int j = 0; j < image.width; j++) printf("%d ", ((unsigned char*)buf_data[2])[i*image.width+j]); printf("\n****\n"); } OCL_UNMAP_BUFFER(2); #endif } static void my_test(void) { rle_image image(256, 256); read_data("new_data.txt", image); expand_rle(image); } MAKE_UTEST_FROM_FUNCTION(my_test); Beignet-1.1.1-Source/utests/runtime_compile_link.cpp000664 001750 001750 00000012337 12576733264 021745 0ustar00yryr000000 000000 #include #include #include #include "utest_helper.hpp" #include "utest_file_map.hpp" #define BUFFERSIZE 32*1024 int init_program(const char* name, cl_context ctx, cl_program *pg ) { cl_int err; char* ker_path = cl_do_kiss_path(name, device); cl_file_map_t *fm = cl_file_map_new(); err = cl_file_map_open(fm, ker_path); if(err != CL_FILE_MAP_SUCCESS) OCL_ASSERT(0); const char *src = cl_file_map_begin(fm); *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); free(ker_path); cl_file_map_delete(fm); return 0; } void runtime_compile_link(void) { cl_int err; const char* header_file_name="runtime_compile_link.h"; cl_program foo_pg; init_program(header_file_name, ctx, &foo_pg); const char* myinc_file_name="include/runtime_compile_link_inc.h"; cl_program myinc_pg; init_program(myinc_file_name, ctx, &myinc_pg); const char* file_name_A="runtime_compile_link_a.cl"; cl_program program_A; init_program(file_name_A, ctx, &program_A); cl_program input_headers[2] = { foo_pg, myinc_pg}; const char * input_header_names[2] = {header_file_name, myinc_file_name}; err = clCompileProgram(program_A, 0, NULL, // num_devices & device_list NULL, // compile_options 2, // num_input_headers input_headers, input_header_names, NULL, NULL); OCL_ASSERT(err==CL_SUCCESS); const char* file_name_B="runtime_compile_link_b.cl"; cl_program program_B; init_program(file_name_B, ctx, &program_B); err = clCompileProgram(program_B, 0, NULL, // num_devices & device_list NULL, // compile_options 2, // num_input_headers input_headers, input_header_names, NULL, NULL); OCL_ASSERT(err==CL_SUCCESS); cl_program input_programs[2] = { program_A, program_B}; cl_program linked_program = clLinkProgram(ctx, 0, NULL, "-create-library", 2, input_programs, NULL, NULL, &err); OCL_ASSERT(linked_program != NULL); OCL_ASSERT(err == CL_SUCCESS); size_t binarySize; unsigned char *binary; // Get the size of the resulting binary (only one device) err= clGetProgramInfo( linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); OCL_ASSERT(err==CL_SUCCESS); // Create a buffer and get the actual binary binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); if (binary == NULL) { OCL_ASSERT(0); return ; } unsigned char *buffers[ 1 ] = { binary }; // Do another sanity check here first size_t size; cl_int loadErrors[ 1 ]; err = clGetProgramInfo( linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size ); OCL_ASSERT(err==CL_SUCCESS); if( size != sizeof( buffers ) ){ free(binary); return ; } err = clGetProgramInfo( linked_program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); OCL_ASSERT(err==CL_SUCCESS); cl_device_id deviceID; err = clGetProgramInfo( linked_program, CL_PROGRAM_DEVICES, sizeof( deviceID), &deviceID, NULL ); OCL_ASSERT(err==CL_SUCCESS); cl_program program_with_binary = clCreateProgramWithBinary(ctx, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &err); OCL_ASSERT(err==CL_SUCCESS); cl_program new_linked_program = clLinkProgram(ctx, 1, &deviceID, NULL, 1, &program_with_binary, NULL, NULL, &err); OCL_ASSERT(err==CL_SUCCESS); // link success, run this kernel. const size_t n = 16; int64_t src1[n], src2[n]; src1[0] = (int64_t)1 << 63, src2[0] = 0x7FFFFFFFFFFFFFFFll; src1[1] = (int64_t)1 << 63, src2[1] = ((int64_t)1 << 63) | 1; src1[2] = -1ll, src2[2] = 0; src1[3] = ((int64_t)123 << 32) | 0x7FFFFFFF, src2[3] = ((int64_t)123 << 32) | 0x80000000; src1[4] = 0x7FFFFFFFFFFFFFFFll, src2[4] = (int64_t)1 << 63; src1[5] = ((int64_t)1 << 63) | 1, src2[5] = (int64_t)1 << 63; src1[6] = 0, src2[6] = -1ll; src1[7] = ((int64_t)123 << 32) | 0x80000000, src2[7] = ((int64_t)123 << 32) | 0x7FFFFFFF; for(size_t i=8; i= CL_SUBMITTED); } buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); clEnqueueMarkerWithWaitList(queue, 0, NULL, &ev[3]); clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), (void *)cpu_src_2, 0, NULL, &ev[4]); OCL_FINISH(); clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status == CL_COMPLETE); OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status == CL_COMPLETE); OCL_FINISH(); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status <= CL_COMPLETE); } for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); } clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clReleaseEvent(ev[i]); } } MAKE_UTEST_FROM_FUNCTION(runtime_marker_list); Beignet-1.1.1-Source/utests/compiler_volatile.cpp000664 001750 001750 00000000264 12576733264 021242 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_volatile(void) { // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_volatile"); } MAKE_UTEST_FROM_FUNCTION(compiler_volatile); Beignet-1.1.1-Source/utests/compiler_async_copy.cpp000664 001750 001750 00000003242 12576733264 021571 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include typedef unsigned char uchar; typedef unsigned short ushort; #define DEF(TYPE, KER_TYPE, VEC_SIZE) \ static void compiler_async_copy_##KER_TYPE##VEC_SIZE(void) \ { \ const size_t n = 1024; \ const size_t local_size = 32; \ const int copiesPerWorkItem = 5; \ \ /* Setup kernel and buffers */\ OCL_CREATE_KERNEL_FROM_FILE("compiler_async_copy", "compiler_async_copy_" # KER_TYPE # VEC_SIZE); \ OCL_CREATE_BUFFER(buf[0], 0, n * copiesPerWorkItem * sizeof(TYPE) * VEC_SIZE, NULL); \ OCL_CREATE_BUFFER(buf[1], 0, n * copiesPerWorkItem * sizeof(TYPE) * VEC_SIZE, NULL); \ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ OCL_SET_ARG(2, local_size*copiesPerWorkItem*sizeof(TYPE)*VEC_SIZE, NULL); \ OCL_SET_ARG(3, sizeof(int), &copiesPerWorkItem); \ \ OCL_MAP_BUFFER(1); \ for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; ++i) \ ((TYPE*)buf_data[1])[i] = rand(); \ OCL_UNMAP_BUFFER(1); \ \ /* Run the kernel */\ globals[0] = n; \ locals[0] = local_size; \ OCL_NDRANGE(1); \ OCL_MAP_BUFFER(0); \ OCL_MAP_BUFFER(1); \ \ /* Check results */\ TYPE *dst = (TYPE*)buf_data[0]; \ TYPE *src = (TYPE*)buf_data[1]; \ for (uint32_t i = 0; i < n * copiesPerWorkItem * VEC_SIZE; i++) \ OCL_ASSERT(dst[i] == src[i]); \ OCL_UNMAP_BUFFER(0); \ OCL_UNMAP_BUFFER(1); \ } \ \ MAKE_UTEST_FROM_FUNCTION(compiler_async_copy_##KER_TYPE##VEC_SIZE); DEF(char, char, 2); DEF(uchar, uchar, 2); DEF(short, short, 2); DEF(ushort, ushort, 2); DEF(int, int, 2); DEF(uint, uint, 2); DEF(int64_t, long, 2); DEF(uint64_t, ulong, 2); DEF(float, float, 2); //DEF(double, double, 2); Beignet-1.1.1-Source/utests/compiler_bool_cross_basic_block.cpp000664 001750 001750 00000002525 12576733264 024104 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst, int scale) { bool isRedRow = false; bool isRed; int val = src[global_id]; for (int i=0; i #include #include #include #include #include "utest_helper.hpp" static uint32_t __half_to_float(uint16_t h, bool* isInf = NULL, bool* infSign = NULL) { struct __FP32 { uint32_t mantissa:23; uint32_t exponent:8; uint32_t sign:1; }; struct __FP16 { uint32_t mantissa:10; uint32_t exponent:5; uint32_t sign:1; }; uint32_t f; __FP32 o; memset(&o, 0, sizeof(o)); __FP16 i; memcpy(&i, &h, sizeof(uint16_t)); if (isInf) *isInf = false; if (infSign) *infSign = false; if (i.exponent == 0 && i.mantissa == 0) // (Signed) zero o.sign = i.sign; else { if (i.exponent == 0) { // Denormal (converts to normalized) // Adjust mantissa so it's normalized (and keep // track of exponent adjustment) int e = -1; uint m = i.mantissa; do { e++; m <<= 1; } while ((m & 0x400) == 0); o.mantissa = (m & 0x3ff) << 13; o.exponent = 127 - 15 - e; o.sign = i.sign; } else if (i.exponent == 0x1f) { // Inf/NaN // NOTE: Both can be handled with same code path // since we just pass through mantissa bits. o.mantissa = i.mantissa << 13; o.exponent = 255; o.sign = i.sign; if (isInf) { *isInf = (i.mantissa == 0); if (infSign) *infSign = !i.sign; } } else { // Normalized number o.mantissa = i.mantissa << 13; o.exponent = 127 - 15 + i.exponent; o.sign = i.sign; } } memcpy(&f, &o, sizeof(uint32_t)); return f; } static uint16_t __float_to_half(uint32_t x) { uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */ uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */ /* If zero, or denormal, or exponent underflows too much for a denormal * half, return signed zero. */ if (e < 103) return bits; /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ if (e > 142) { bits |= 0x7c00u; /* If exponent was 0xff and one mantissa bit was set, it means NaN, * not Inf, so make sure we set one mantissa bit too. */ bits |= e == 255 && (x & 0x007fffffu); return bits; } /* If exponent underflows but not too much, return a denormal */ if (e < 113) { m |= 0x0800u; /* Extra rounding may overflow and set mantissa to 0 and exponent * to 1, which is OK. */ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); return bits; } bits |= ((e - 112) << 10) | (m >> 1); /* Extra rounding. An overflow will set mantissa to 0 and increment * the exponent, which is OK. */ bits += m & 1; return bits; } static int check_half_device(void) { std::string extStr; size_t param_value_size; OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size); std::vector param_value(param_value_size); OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size, param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size); if (!param_value.empty()) extStr = std::string(¶m_value.front(), param_value_size-1); if (std::strstr(extStr.c_str(), "cl_khr_fp16") == NULL) { printf("No cl_khr_fp16, Skip!"); return 0; } return 1; } void compiler_half_basic(void) { const size_t n = 16; uint16_t hsrc[n]; float fsrc[n], fdst[n]; float f = 2.5; uint32_t tmp_f; if (!check_half_device()) return; memcpy(&tmp_f, &f, sizeof(float)); // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half", "compiler_half_basic"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { fsrc[i] = 10.1 * i; memcpy(&tmp_f, &fsrc[i], sizeof(float)); hsrc[i] = __float_to_half(tmp_f); } for (int32_t i = 0; i < (int32_t) n; ++i) { fdst[i] = fsrc[i] + f; fdst[i] = fdst[i]*fdst[i]; fdst[i] = fdst[i]/1.8; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(hsrc)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]); memcpy(&f, &tmp_f, sizeof(float)); printf("%f %f\n", f, fdst[i]); OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_basic); #define HALF_MATH_TEST_1ARG(NAME, CPPNAME, RANGE_L, RANGE_H) \ void compiler_half_math_##NAME(void) \ { \ const size_t n = 16; \ uint16_t hsrc[n]; \ float fsrc[n], fdst[n]; \ uint32_t tmp_f; \ float f; \ \ if (!check_half_device()) \ return; \ \ OCL_CREATE_KERNEL_FROM_FILE("compiler_half_math", "compiler_half_math_" #NAME); \ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); \ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); \ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ globals[0] = n; \ locals[0] = 16; \ \ for (int32_t i = 0; i < (int32_t) n; ++i) { \ fsrc[i] = RANGE_L + ((rand()%1000) / 1000.0f ) * ((RANGE_H) - (RANGE_L)); \ memcpy(&tmp_f, &fsrc[i], sizeof(float)); \ hsrc[i] = __float_to_half(tmp_f); \ } \ \ for (int32_t i = 0; i < (int32_t) n; ++i) { \ /* printf("Float is %f\n", fsrc[i]); */ \ fdst[i] = CPPNAME(fsrc[i]); \ } \ \ OCL_MAP_BUFFER(0); \ OCL_MAP_BUFFER(1); \ memcpy(buf_data[0], hsrc, sizeof(hsrc)); \ memset(buf_data[1], 0, sizeof(hsrc)); \ OCL_UNMAP_BUFFER(0); \ OCL_UNMAP_BUFFER(1); \ OCL_NDRANGE(1); \ \ OCL_MAP_BUFFER(1); \ for (int32_t i = 0; i < (int32_t) n; ++i) { \ bool isInf, infSign; \ tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i], &isInf, &infSign); \ memcpy(&f, &tmp_f, sizeof(float)); \ /*printf("%.15f %.15f, diff is %%%f\n", f, fdst[i], (fabs(f - fdst[i])/fabs(fdst[i]))); */ \ OCL_ASSERT(((fabs(fdst[i]) < 6e-8f) && (fabs(f) < 6e-8f)) || \ (fabs(f - fdst[i]) <= 0.03 * fabs(fdst[i])) || \ (isInf && ((infSign && fdst[i] > 65504.0f) || (!infSign && fdst[i] < -65504.0f))) || \ (isnan(f) && isnan(fdst[i]))); \ } \ OCL_UNMAP_BUFFER(1); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_half_math_##NAME); HALF_MATH_TEST_1ARG(sin, sinf, -10, 10); HALF_MATH_TEST_1ARG(cos, cosf, -10, 10); HALF_MATH_TEST_1ARG(sinh, sinh, -10, 10); HALF_MATH_TEST_1ARG(cosh, cosh, -10, 10); HALF_MATH_TEST_1ARG(tan, tanf, -3.14/2, 3.14/2); HALF_MATH_TEST_1ARG(log10, log10f, 0.1, 100); HALF_MATH_TEST_1ARG(log, logf, 0.01, 1000); HALF_MATH_TEST_1ARG(trunc, truncf, -1000, 1000); HALF_MATH_TEST_1ARG(exp, expf, -19.0, 20.0); HALF_MATH_TEST_1ARG(sqrt, sqrtf, -19.0, 10.0); HALF_MATH_TEST_1ARG(ceil, ceilf, -19.0, 20.0); #define HALF_MATH_TEST_2ARG(NAME, CPPNAME, RANGE_L, RANGE_H) \ void compiler_half_math_##NAME(void) \ { \ const size_t n = 16*4; \ uint16_t hsrc0[n], hsrc1[n]; \ float fsrc0[n], fsrc1[n], fdst[n]; \ uint32_t tmp_f; \ float f; \ \ if (!check_half_device()) \ return; \ \ OCL_CREATE_KERNEL_FROM_FILE("compiler_half_math", "compiler_half_math_" #NAME); \ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); \ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); \ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint16_t), NULL); \ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); \ globals[0] = n; \ locals[0] = 16; \ \ for (int32_t i = 0; i < (int32_t) n; ++i) { \ fsrc0[i] = RANGE_L + (((RANGE_H) - (RANGE_L))/n) * i; \ memcpy(&tmp_f, &fsrc0[i], sizeof(float)); \ hsrc0[i] = __float_to_half(tmp_f); \ fsrc1[i] = RANGE_L + ((rand()%1000) / 1000.0f ) * ((RANGE_H) - (RANGE_L)); \ memcpy(&tmp_f, &fsrc1[i], sizeof(float)); \ hsrc1[i] = __float_to_half(tmp_f); \ } \ \ for (int32_t i = 0; i < (int32_t) n; ++i) { \ /* printf("Float is %f %f\n", fsrc0[i], fsrc1[i]);*/ \ fdst[i] = CPPNAME(fsrc0[i], fsrc1[i]); \ } \ \ OCL_MAP_BUFFER(0); \ OCL_MAP_BUFFER(1); \ OCL_MAP_BUFFER(2); \ memcpy(buf_data[0], hsrc0, sizeof(hsrc0)); \ memcpy(buf_data[1], hsrc1, sizeof(hsrc1)); \ memset(buf_data[2], 0, sizeof(hsrc0)); \ OCL_UNMAP_BUFFER(0); \ OCL_UNMAP_BUFFER(1); \ OCL_UNMAP_BUFFER(2); \ OCL_NDRANGE(1); \ \ OCL_MAP_BUFFER(2); \ for (int32_t i = 0; i < (int32_t) n; ++i) { \ bool isInf, infSign; \ tmp_f = __half_to_float(((uint16_t *)buf_data[2])[i], &isInf, &infSign); \ memcpy(&f, &tmp_f, sizeof(float)); \ /*printf("%.15f %.15f, diff is %%%f\n", f, fdst[i], (fabs(f - fdst[i])/fabs(fdst[i]))); */ \ OCL_ASSERT(((fabs(fdst[i]) < 6e-8f) && (fabs(f) < 6e-8f)) || \ (fabs(f - fdst[i]) <= 0.03 * fabs(fdst[i])) || \ (isInf && ((infSign && fdst[i] > 65504.0f) || (!infSign && fdst[i] < -65504.0f))) || \ (isnan(f) && isnan(fdst[i]))); \ } \ OCL_UNMAP_BUFFER(2); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_half_math_##NAME); HALF_MATH_TEST_2ARG(fmod, fmod, 1.0, 500.0); HALF_MATH_TEST_2ARG(fmax, fmax, -10.0, 20.0); HALF_MATH_TEST_2ARG(fmin, fmin, -10.0, 20.0); void compiler_half_isnan(void) { const size_t n = 16*2; uint16_t hsrc[n]; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_relation", "compiler_half_isnan"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { hsrc[i] = 0xFF00; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(uint16_t)*n); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%d\n", ((uint16_t *)buf_data[1])[i]); OCL_ASSERT(((int16_t *)buf_data[1])[i] == -1); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_isnan); void compiler_half_isinf(void) { const size_t n = 16; uint16_t hsrc[n]; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_relation", "compiler_half_isinf"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n/2; ++i) { hsrc[i] = 0x7C00; } for (int32_t i = n/2; i < (int32_t) n; ++i) { hsrc[i] = 0xFC00; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(int)*n); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%d\n", ((int *)buf_data[1])[i]); OCL_ASSERT(((int *)buf_data[1])[i] == 1); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_isinf); void compiler_half_to_float(void) { const size_t n = 16*4; uint16_t hsrc[n]; float fdst[n]; uint32_t tmp_f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_float"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { fdst[i] = 13.1 * i; memcpy(&tmp_f, &fdst[i], sizeof(float)); hsrc[i] = __float_to_half(tmp_f); } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0.0f, sizeof(fdst)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%f %f, abs is %f\n", (((float *)buf_data[1])[i]), fdst[i], fabs((((float *)buf_data[1])[i]) - fdst[i])); OCL_ASSERT((fabs((((float *)buf_data[1])[i]) - fdst[i]) < 0.001 * fabs(fdst[i])) || (fdst[i] == 0.0 && (((float *)buf_data[1])[i]) == 0.0)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_float); void compiler_half_as_char2(void) { const size_t n = 16; uint16_t hsrc[n]; uint8_t* csrc = (uint8_t*)hsrc; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_as_char2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { hsrc[i] = (i&0x0f)<<8 | ((i+1)&0x0f); } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(hsrc)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n*2; ++i) { //printf("%d %d\n", (((uint8_t *)buf_data[1])[i]), csrc[i]); OCL_ASSERT((((uint8_t *)buf_data[1])[i]) == csrc[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_as_char2); void compiler_half2_as_int(void) { const size_t n = 16*2; uint16_t hsrc[n]; int* isrc = (int*)hsrc; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half2_as_int"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { hsrc[i] = (i&0x0f)<<8 | ((i+1)&0x0f); } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(hsrc)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n/2; ++i) { //printf("%d %d\n", (((int *)buf_data[1])[i]), isrc[i]); OCL_ASSERT((((int *)buf_data[1])[i]) == isrc[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half2_as_int); void compiler_half_to_char_sat(void) { const size_t n = 16; uint16_t hsrc[n]; float fsrc[n]; char dst[n]; uint32_t tmp_f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_char_sat"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(char), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { fsrc[i] = -200.1f + 30.5f * i; memcpy(&tmp_f, &fsrc[i], sizeof(float)); hsrc[i] = __float_to_half(tmp_f); if (fsrc[i] <= -128.0f) { dst[i] = -128; } else if (fsrc[i] >= 127.0f) { dst[i] = 127; } else { dst[i] = (char)fsrc[i]; } } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(dst)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%d %d\n", (((char *)buf_data[1])[i]), dst[i]); OCL_ASSERT((((char *)buf_data[1])[i]) == dst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_char_sat); void compiler_half_to_ushort_sat(void) { const size_t n = 16; uint16_t hsrc[n]; float fsrc[n]; uint16_t dst[n]; uint32_t tmp_f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_ushort_sat"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { fsrc[i] = -100.1f + 10.3f * i; memcpy(&tmp_f, &fsrc[i], sizeof(float)); hsrc[i] = __float_to_half(tmp_f); if (fsrc[i] <= 0.0f) { dst[i] = 0; } else { dst[i] = (uint16_t)fsrc[i]; } } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(dst)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%u %u\n", (((uint16_t *)buf_data[1])[i]), dst[i]); OCL_ASSERT((((uint16_t *)buf_data[1])[i]) == dst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_ushort_sat); void compiler_half_to_uint_sat(void) { const size_t n = 16; uint16_t hsrc[n]; float fsrc[n]; uint32_t dst[n]; uint32_t tmp_f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_uint_sat"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { fsrc[i] = -10.1f + 13.965f * i; memcpy(&tmp_f, &fsrc[i], sizeof(float)); hsrc[i] = __float_to_half(tmp_f); if (fsrc[i] <= 0.0f) { dst[i] = 0; } else { dst[i] = (uint32_t)fsrc[i]; } } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, sizeof(dst)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%u %u\n", (((uint32_t *)buf_data[1])[i]), dst[i]); OCL_ASSERT((((uint32_t *)buf_data[1])[i]) == dst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_uint_sat); void compiler_uchar_to_half(void) { const size_t n = 16; uint8_t hsrc[n]; float fdst[n]; uint32_t tmp_f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_uchar_to_half"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint8_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { hsrc[i] = 5*i; fdst[i] = (float)hsrc[i]; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, n*sizeof(uint16_t)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { float f; tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]); memcpy(&f, &tmp_f, sizeof(float)); //printf("%f %f\n", f, fdst[i]); OCL_ASSERT(f == fdst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_uchar_to_half); void compiler_int_to_half(void) { const size_t n = 16; int hsrc[n]; float fdst[n]; uint32_t tmp_f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_int_to_half"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { hsrc[i] = 51*i; fdst[i] = (float)hsrc[i]; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, n*sizeof(uint16_t)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { float f; tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]); memcpy(&f, &tmp_f, sizeof(float)); //printf("%f %f\n", f, fdst[i]); OCL_ASSERT(f == fdst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_int_to_half); void compiler_half_to_long(void) { const size_t n = 16; uint16_t hsrc[n]; int64_t ldst[n]; uint32_t tmp_f; float f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_long"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { f = -100.1f + 10.3f * i; memcpy(&tmp_f, &f, sizeof(float)); hsrc[i] = __float_to_half(tmp_f); ldst[i] = (int64_t)f; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, n*sizeof(uint64_t)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%ld %ld\n", (((int64_t *)buf_data[1])[i]), ldst[i]); OCL_ASSERT((((int64_t *)buf_data[1])[i]) == ldst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_long); void compiler_ulong_to_half(void) { const size_t n = 16; uint64_t src[n]; float fdst[n]; uint32_t tmp_f; float f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_ulong_to_half"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 0; i < (int32_t) n; ++i) { src[i] = 10 + 126*i; fdst[i] = (float)src[i]; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src, sizeof(src)); memset(buf_data[1], 0, n*sizeof(uint16_t)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]); memcpy(&f, &tmp_f, sizeof(float)); //printf("%f %f\n", f, fdst[i]); OCL_ASSERT(f == fdst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_ulong_to_half); void compiler_half_to_long_sat(void) { const size_t n = 16; uint16_t hsrc[n]; int64_t ldst[n]; uint32_t tmp_f; float f; if (!check_half_device()) return; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_long_sat"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int32_t i = 1; i < (int32_t) n-1; ++i) { f = -100.1f + 10.3f * i; memcpy(&tmp_f, &f, sizeof(float)); hsrc[i] = __float_to_half(tmp_f); ldst[i] = (int64_t)f; } hsrc[0] = 0xFC00; //-inf; ldst[0] = 0x8000000000000000; hsrc[n-1] = 0x7C00; //inf; ldst[n-1] = 0x7FFFFFFFFFFFFFFF; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], hsrc, sizeof(hsrc)); memset(buf_data[1], 0, n*sizeof(uint64_t)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%lx %lx\n", (((int64_t *)buf_data[1])[i]), ldst[i]); OCL_ASSERT((((int64_t *)buf_data[1])[i]) == ldst[i]); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_long_sat); Beignet-1.1.1-Source/utests/runtime_null_kernel_arg.cpp000664 001750 001750 00000001160 12576733264 022433 0ustar00yryr000000 000000 #include "utest_helper.hpp" void runtime_null_kernel_arg(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("null_kernel_arg"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), NULL); OCL_SET_ARG(2, sizeof(cl_mem), NULL); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == i); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(runtime_null_kernel_arg); Beignet-1.1.1-Source/utests/compiler_global_constant.cpp000664 001750 001750 00000005736 12576733264 022605 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_global_constant(void) { const size_t n = 2048; const uint32_t e = 34, r = 77; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_global_constant"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(uint32_t), &e); OCL_SET_ARG(2, sizeof(uint32_t), &r); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); unsigned int m[3] = {71,72,73}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], m[i%3] + e + r); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == m[i%3] + e + r); OCL_UNMAP_BUFFER(0); } void compiler_global_constant1(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant", "compiler_global_constant1"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); uint32_t data1[] = {1, 4, 7}; uint32_t data2[]= {3, 7, 11}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], data1[i%3] + data2[i%3]); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == data1[i%3] + data2[i%3]); OCL_UNMAP_BUFFER(0); } void compiler_global_constant2(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant", "compiler_global_constant2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], 6); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == 6); OCL_UNMAP_BUFFER(0); } void compiler_global_constant3(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant", "compiler_global_constant3"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); uint32_t data1[] = {3, 6, 9}; char data2[]= {'c', 'f', 'j'}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // printf("%d result %d reference %d\n", i, ((uint32_t *)buf_data[0])[i], data1[i%3] + (int)data2[i%3]); OCL_ASSERT(((uint32_t *)buf_data[0])[i] == data1[i%3] + (uint32_t)data2[i%3]); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_global_constant, true); MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_global_constant1, true); MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_global_constant2, true); MAKE_UTEST_FROM_FUNCTION(compiler_global_constant3); Beignet-1.1.1-Source/utests/compiler_long_div.cpp000664 001750 001750 00000004774 12576733264 021236 0ustar00yryr000000 000000 #include #include #include #include "utest_helper.hpp" void compiler_long_div(void) { const size_t n = 16; int64_t src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_long_div", "compiler_long_div"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src1[i] = ((int64_t)rand() << 32) + rand(); src2[i] = ((int64_t)rand() << 32) + rand();; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("ref is %lx, res is %lx\n", src1[i] / src2[i] , ((int64_t *)buf_data[2])[i]); OCL_ASSERT(src1[i] / src2[i] == ((int64_t *)buf_data[2])[i]); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_div); void compiler_long_rem(void) { const size_t n = 16; int64_t src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_long_div", "compiler_long_rem"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src1[i] = ((int64_t)rand() << 32) + rand(); src2[i] = ((int64_t)rand() << 32) + rand();; } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("ref is %lx, res is %lx\n", src1[i] / src2[i] , ((int64_t *)buf_data[2])[i]); OCL_ASSERT(src1[i] % src2[i] == ((int64_t *)buf_data[2])[i]); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_rem); Beignet-1.1.1-Source/utests/compiler_array3.cpp000664 001750 001750 00000002416 12576733264 020625 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { int tmp[32]; for (int i = 0; i < 16; ++i) { for (int j = 0; j < 16; ++j) tmp[j] = global_id; for (int j = 0; j < src[0]; ++j) tmp[j] = 1+src[j]; tmp[16+i] = tmp[i]; } dst[global_id] = tmp[16+global_id]; } void compiler_array3(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array3"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_array3); Beignet-1.1.1-Source/utests/compiler_write_only_bytes.cpp000664 001750 001750 00000001022 12576733264 023015 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_write_only_bytes(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_write_only_bytes"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint8_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint8_t*)buf_data[0])[i] == 2); } MAKE_UTEST_FROM_FUNCTION(compiler_write_only_bytes); Beignet-1.1.1-Source/utests/compiler_long_mult.cpp000664 001750 001750 00000002464 12576733264 021427 0ustar00yryr000000 000000 #include #include #include #include "utest_helper.hpp" void compiler_long_mult(void) { const size_t n = 16; int64_t src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_mult"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) { src1[i] = 0x77665544FFEEDDCCLL; src2[i] = ((int64_t)rand() << 32) + rand(); } OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%lx\n", ((int64_t *)buf_data[2])[i]); if (i < 3) OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]); else OCL_ASSERT(src1[i] * src2[i] == ((int64_t *)buf_data[2])[i]); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_mult); Beignet-1.1.1-Source/utests/compiler_function_constant0.cpp000664 001750 001750 00000002035 12576733264 023237 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_function_constant0(void) { const size_t n = 2048; const uint32_t value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_function_constant0"); OCL_CREATE_BUFFER(buf[0], 0, 75 * sizeof(int32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, 1 * sizeof(char), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(uint32_t), &value); OCL_MAP_BUFFER(0); for(uint32_t i = 0; i < 69; ++i) ((int32_t *)buf_data[0])[i] = i; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); ((char *)buf_data[1])[0] = 15; OCL_UNMAP_BUFFER(1); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(2); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t *)buf_data[2])[i] == (value + 15 + i%69)); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_function_constant0); Beignet-1.1.1-Source/utests/runtime_barrier_list.cpp000664 001750 001750 00000004254 12576733264 021760 0ustar00yryr000000 000000 #include "utest_helper.hpp" #define BUFFERSIZE 32*1024 void runtime_barrier_list(void) { const size_t n = BUFFERSIZE; cl_int cpu_src[BUFFERSIZE]; cl_int cpu_src_2[BUFFERSIZE]; cl_event ev[5]; cl_int status = 0; cl_int value = 34; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_event"); OCL_CREATE_BUFFER(buf[0], 0, BUFFERSIZE*sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, BUFFERSIZE*sizeof(int), NULL); for(cl_uint i=0; i= CL_SUBMITTED); } buf_data[0] = clEnqueueMapBuffer(queue, buf[0], CL_TRUE, 0, 0, BUFFERSIZE*sizeof(int), 1, &ev[2], NULL, NULL); clEnqueueBarrierWithWaitList(queue, 0, NULL, &ev[3]); clEnqueueWriteBuffer(queue, buf[1], CL_TRUE, 0, BUFFERSIZE*sizeof(int), (void *)cpu_src_2, 0, NULL, &ev[4]); OCL_FINISH(); clGetEventInfo(ev[4], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status != CL_COMPLETE); OCL_SET_USER_EVENT_STATUS(ev[0], CL_COMPLETE); clGetEventInfo(ev[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status == CL_COMPLETE); OCL_FINISH(); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clGetEventInfo(ev[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); OCL_ASSERT(status <= CL_COMPLETE); } for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((int*)buf_data[0])[i] == (int)value + 0x3); } clEnqueueUnmapMemObject(queue, buf[0], buf_data[0], 0, NULL, NULL); for (cl_uint i = 0; i != sizeof(ev) / sizeof(cl_event); ++i) { clReleaseEvent(ev[i]); } } MAKE_UTEST_FROM_FUNCTION(runtime_barrier_list); Beignet-1.1.1-Source/utests/compiler_unstructured_branch0.cpp000664 001750 001750 00000003117 12576733264 023567 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_unstructured_branch0(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_unstructured_branch0"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 16; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1); // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1); // Third control flow for (uint32_t i = 0; i < 8; ++i) ((int32_t*)buf_data[0])[i] = 2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 8; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); for (uint32_t i = 8; i < 32; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 1); } MAKE_UTEST_FROM_FUNCTION(compiler_unstructured_branch0); Beignet-1.1.1-Source/utests/compiler_cl_finish.cpp000664 001750 001750 00000002311 12576733264 021354 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include #define T_GET(t) gettimeofday(&t, NULL); #define T_LAPSE(t1, t2) \ ((t2.tv_sec+t2.tv_usec*0.000001) - (t1.tv_sec+t1.tv_usec*0.000001)) static void compiler_cl_finish(void) { const size_t n = 16*1024*1024; struct timeval t1, t2; float t_fin, t_map_w_fin,t_map_wo_fin; // Setup kernel and buffers OCL_CREATE_KERNEL("test_cl_finish"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); // Run the kernel locals[0] = 64; globals[0] = 32 * locals[0]; OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(int), &n); OCL_SET_ARG(3, sizeof(int), &globals[0]); // 1st time map after clFinish OCL_NDRANGE(1); T_GET(t1); OCL_FINISH(); T_GET(t2); t_fin = T_LAPSE(t1, t2); T_GET(t1); OCL_MAP_BUFFER(0); T_GET(t2); t_map_w_fin = T_LAPSE(t1, t2); // 2nd time map without clFinish OCL_NDRANGE(1); T_GET(t1); OCL_MAP_BUFFER(0); T_GET(t2); t_map_wo_fin = T_LAPSE(t1, t2); OCL_ASSERT(t_fin > t_map_w_fin && t_map_wo_fin > t_map_w_fin); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_cl_finish); Beignet-1.1.1-Source/utests/compiler_uint3_unaligned_copy.cpp000664 001750 001750 00000002377 12576733264 023554 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_uint3_unaligned_copy(void) { const size_t n = 128; // Setup kernel and buffers. Note that uint3 is aligned on 16 bytes // according to the OCL specification OCL_CREATE_KERNEL("compiler_uint3_unaligned_copy"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t[4]) * n); for (uint32_t i = 0; i < n; ++i) { ((uint32_t*)buf_data[0])[3*i+0] = 3*i+0; ((uint32_t*)buf_data[0])[3*i+1] = 3*i+1; ((uint32_t*)buf_data[0])[3*i+2] = 3*i+2; } OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t[4]), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t[4]), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+0] == ((uint32_t*)buf_data[1])[3*i+0]); OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+1] == ((uint32_t*)buf_data[1])[3*i+1]); OCL_ASSERT(((uint32_t*)buf_data[0])[3*i+2] == ((uint32_t*)buf_data[1])[3*i+2]); } } MAKE_UTEST_FROM_FUNCTION(compiler_uint3_unaligned_copy); Beignet-1.1.1-Source/utests/compiler_copy_image1.cpp000664 001750 001750 00000004561 12576733264 021624 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void compiler_copy_image1(void) { const size_t w = 512; const size_t h = 512; cl_image_format format; cl_image_desc desc; cl_sampler sampler; memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, sizeof(cl_image_format)); // Setup kernel and images OCL_CREATE_KERNEL("test_copy_image1"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = w; desc.image_height = h; desc.image_row_pitch = w * sizeof(uint32_t); OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]); OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); desc.image_row_pitch = 0; OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL); OCL_CREATE_IMAGE(buf[2], 0, &format, &desc, NULL); OCL_CREATE_IMAGE(buf[3], 0, &format, &desc, NULL); OCL_CREATE_IMAGE(buf[4], 0, &format, &desc, NULL); OCL_CREATE_IMAGE(buf[5], 0, &format, &desc, NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(sampler), &sampler); OCL_SET_ARG(3, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(4, sizeof(cl_mem), &buf[3]); OCL_SET_ARG(5, sizeof(cl_mem), &buf[4]); OCL_SET_ARG(6, sizeof(cl_mem), &buf[5]); float w_inv = 1.0/w; float h_inv = 1.0/h; OCL_SET_ARG(7, sizeof(float), &w_inv); OCL_SET_ARG(8, sizeof(float), &h_inv); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); OCL_MAP_BUFFER(3); OCL_MAP_BUFFER(4); OCL_MAP_BUFFER(5); for(uint32_t k = 0; k < 5; k++) { for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == ((uint32_t*)buf_data[1 + k])[j * w + i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(3); OCL_UNMAP_BUFFER(4); OCL_UNMAP_BUFFER(5); OCL_CALL(clReleaseSampler, sampler); } MAKE_UTEST_FROM_FUNCTION(compiler_copy_image1); Beignet-1.1.1-Source/utests/utest_error.c000664 001750 001750 00000007272 12576733264 017554 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_error.h" #include "CL/cl.h" const char *err_msg[] = { [-CL_SUCCESS] = "CL_SUCCESS", [-CL_DEVICE_NOT_FOUND] = "CL_DEVICE_NOT_FOUND", [-CL_DEVICE_NOT_AVAILABLE] = "CL_DEVICE_NOT_AVAILABLE", [-CL_COMPILER_NOT_AVAILABLE] = "CL_COMPILER_NOT_AVAILABLE", [-CL_MEM_OBJECT_ALLOCATION_FAILURE] = "CL_MEM_OBJECT_ALLOCATION_FAILURE", [-CL_OUT_OF_RESOURCES] = "CL_OUT_OF_RESOURCES", [-CL_OUT_OF_HOST_MEMORY] = "CL_OUT_OF_HOST_MEMORY", [-CL_PROFILING_INFO_NOT_AVAILABLE] = "CL_PROFILING_INFO_NOT_AVAILABLE", [-CL_MEM_COPY_OVERLAP] = "CL_MEM_COPY_OVERLAP", [-CL_IMAGE_FORMAT_MISMATCH] = "CL_IMAGE_FORMAT_MISMATCH", [-CL_IMAGE_FORMAT_NOT_SUPPORTED] = "CL_IMAGE_FORMAT_NOT_SUPPORTED", [-CL_BUILD_PROGRAM_FAILURE] = "CL_BUILD_PROGRAM_FAILURE", [-CL_MAP_FAILURE] = "CL_MAP_FAILURE", [-CL_MISALIGNED_SUB_BUFFER_OFFSET] = "CL_MISALIGNED_SUB_BUFFER_OFFSET", [-CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST] = "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST", [-CL_INVALID_VALUE] = "CL_INVALID_VALUE", [-CL_INVALID_DEVICE_TYPE] = "CL_INVALID_DEVICE_TYPE", [-CL_INVALID_PLATFORM] = "CL_INVALID_PLATFORM", [-CL_INVALID_DEVICE] = "CL_INVALID_DEVICE", [-CL_INVALID_CONTEXT] = "CL_INVALID_CONTEXT", [-CL_INVALID_QUEUE_PROPERTIES] = "CL_INVALID_QUEUE_PROPERTIES", [-CL_INVALID_COMMAND_QUEUE] = "CL_INVALID_COMMAND_QUEUE", [-CL_INVALID_HOST_PTR] = "CL_INVALID_HOST_PTR", [-CL_INVALID_MEM_OBJECT] = "CL_INVALID_MEM_OBJECT", [-CL_INVALID_IMAGE_FORMAT_DESCRIPTOR] = "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", [-CL_INVALID_IMAGE_SIZE] = "CL_INVALID_IMAGE_SIZE", [-CL_INVALID_SAMPLER] = "CL_INVALID_SAMPLER", [-CL_INVALID_BINARY] = "CL_INVALID_BINARY", [-CL_INVALID_BUILD_OPTIONS] = "CL_INVALID_BUILD_OPTIONS", [-CL_INVALID_PROGRAM] = "CL_INVALID_PROGRAM", [-CL_INVALID_PROGRAM_EXECUTABLE] = "CL_INVALID_PROGRAM_EXECUTABLE", [-CL_INVALID_KERNEL_NAME] = "CL_INVALID_KERNEL_NAME", [-CL_INVALID_KERNEL_DEFINITION] = "CL_INVALID_KERNEL_DEFINITION", [-CL_INVALID_KERNEL] = "CL_INVALID_KERNEL", [-CL_INVALID_ARG_INDEX] = "CL_INVALID_ARG_INDEX", [-CL_INVALID_ARG_VALUE] = "CL_INVALID_ARG_VALUE", [-CL_INVALID_ARG_SIZE] = "CL_INVALID_ARG_SIZE", [-CL_INVALID_KERNEL_ARGS] = "CL_INVALID_KERNEL_ARGS", [-CL_INVALID_WORK_DIMENSION] = "CL_INVALID_WORK_DIMENSION", [-CL_INVALID_WORK_GROUP_SIZE] = "CL_INVALID_WORK_GROUP_SIZE", [-CL_INVALID_WORK_ITEM_SIZE] = "CL_INVALID_WORK_ITEM_SIZE", [-CL_INVALID_GLOBAL_OFFSET] = "CL_INVALID_GLOBAL_OFFSET", [-CL_INVALID_EVENT_WAIT_LIST] = "CL_INVALID_EVENT_WAIT_LIST", [-CL_INVALID_EVENT] = "CL_INVALID_EVENT", [-CL_INVALID_OPERATION] = "CL_INVALID_OPERATION", [-CL_INVALID_GL_OBJECT] = "CL_INVALID_GL_OBJECT", [-CL_INVALID_BUFFER_SIZE] = "CL_INVALID_BUFFER_SIZE", [-CL_INVALID_MIP_LEVEL] = "CL_INVALID_MIP_LEVEL", [-CL_INVALID_GLOBAL_WORK_SIZE] = "CL_INVALID_GLOBAL_WORK_SIZE", [-CL_INVALID_PROPERTY] = "CL_INVALID_PROPERTY" }; const size_t err_msg_n = sizeof(err_msg) / sizeof(err_msg[0]); Beignet-1.1.1-Source/utests/compiler_global_constant_2.cpp000664 001750 001750 00000003216 12576733264 023015 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_global_constant_2(void) { const size_t n = 2048; const uint32_t e = 34, r = 77; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_global_constant_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(uint32_t), &e); OCL_SET_ARG(2, sizeof(uint32_t), &r); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); unsigned int m[3] = {0x15b,0x25b,0x35b}; unsigned int t[5] = {0x45b,0x55b,0x65b,0x75b,0x85b}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // std::cout << ((uint32_t *)buf_data[0])[i] << std::endl; OCL_ASSERT(((uint32_t *)buf_data[0])[i] == m[i%3] + t[i%5] + e + r); OCL_UNMAP_BUFFER(0); } void compiler_global_constant_2_long(void) { const size_t n = 2048; const uint32_t e = 34, r = 77; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_global_constant_2", "compiler_global_constant_2_long"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(uint32_t), &e); OCL_SET_ARG(2, sizeof(uint32_t), &r); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); uint64_t m[3] = {0x15b,0x25b,0xFFFFFFFFF}; // Check results OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) // std::cout << ((uint64_t *)buf_data[0])[i] << std::endl; OCL_ASSERT(((uint64_t *)buf_data[0])[i] == m[i%3] + e + r); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_global_constant_2); MAKE_UTEST_FROM_FUNCTION(compiler_global_constant_2_long); Beignet-1.1.1-Source/utests/compiler_overflow.cpp000664 001750 001750 00000011735 12576733264 021273 0ustar00yryr000000 000000 #include "utest_helper.hpp" namespace { typedef struct { unsigned long x; unsigned long y; unsigned long z; unsigned long w; }ulong4; typedef struct { uint32_t x; uint32_t y; uint32_t z; uint32_t w; } uint4; typedef struct { uint16_t x; uint16_t y; uint16_t z; uint16_t w; } ushort4; typedef struct { uint8_t x; uint8_t y; uint8_t z; uint8_t w; } uchar4; template U get_max() { int shift_bit = sizeof(U)*8; U u_max = 0; for (int i = 0; i < shift_bit; i++) u_max |= 1<<(shift_bit-i-1); return u_max; } template void test(const char *kernel_name, int func_type) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_overflow", kernel_name); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); U max = get_max(); // test add and sub overflow when src1 is 1: // uadd.with.overflow: max + 1 // usub.with.overflow: 0 - 1 OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) { if(func_type == 0) { ((T*)buf_data[0])[i].x = max; ((T*)buf_data[0])[i].y = max; ((T*)buf_data[0])[i].z = max; ((T*)buf_data[0])[i].w = i; }else if(func_type == 1) { ((T*)buf_data[0])[i].x = 0; ((T*)buf_data[0])[i].y = 0; ((T*)buf_data[0])[i].z = 0; ((T*)buf_data[0])[i].w = n+2-i; }else OCL_ASSERT(0); } OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { ((T*)buf_data[1])[i].x = 1; ((T*)buf_data[1])[i].y = 1; ((T*)buf_data[1])[i].z = 1; ((T*)buf_data[1])[i].w = 1; } OCL_UNMAP_BUFFER(1); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (uint32_t i = 0; i < 16; ++i) { // printf("%u,%u,%u,%u\n", ((T*)buf_data[2])[i].x,((T*)buf_data[2])[i].y, ((T*)buf_data[2])[i].z, ((T*)buf_data[2])[i].w ); if(func_type == 0) { OCL_ASSERT(((T*)buf_data[2])[i].x == 0); OCL_ASSERT(((T*)buf_data[2])[i].y == 1); OCL_ASSERT(((T*)buf_data[2])[i].z == 1); OCL_ASSERT(((T*)buf_data[2])[i].w == i+2); }else if(func_type == 1) { OCL_ASSERT(((T*)buf_data[2])[i].x == max); OCL_ASSERT(((T*)buf_data[2])[i].y == max-1); OCL_ASSERT(((T*)buf_data[2])[i].z == max-1); OCL_ASSERT(((T*)buf_data[2])[i].w == n-i); }else OCL_ASSERT(0); } OCL_UNMAP_BUFFER(2); // test add and sub overflow when src1 is max: // uadd.with.overflow: max + max // usub.with.overflow: 0 - max OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) { if(func_type == 0) { ((T*)buf_data[0])[i].x = max; ((T*)buf_data[0])[i].y = max; ((T*)buf_data[0])[i].z = max; ((T*)buf_data[0])[i].w = i; }else if(func_type == 1) { ((T*)buf_data[0])[i].x = 0; ((T*)buf_data[0])[i].y = 0; ((T*)buf_data[0])[i].z = 0; ((T*)buf_data[0])[i].w = n+2-i; }else OCL_ASSERT(0); } OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { ((T*)buf_data[1])[i].x = max; ((T*)buf_data[1])[i].y = max; ((T*)buf_data[1])[i].z = max; ((T*)buf_data[1])[i].w = 1; } OCL_UNMAP_BUFFER(1); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (uint32_t i = 0; i < 16; ++i) { // printf("%u,%u,%u,%u\n", ((T*)buf_data[2])[i].x,((T*)buf_data[2])[i].y, ((T*)buf_data[2])[i].z, ((T*)buf_data[2])[i].w ); if(func_type == 0) { OCL_ASSERT(((T*)buf_data[2])[i].x == max-1); OCL_ASSERT(((T*)buf_data[2])[i].y == max); OCL_ASSERT(((T*)buf_data[2])[i].z == max); OCL_ASSERT(((T*)buf_data[2])[i].w == i+2); }else if(func_type == 1) { OCL_ASSERT(((T*)buf_data[2])[i].x == 1); OCL_ASSERT(((T*)buf_data[2])[i].y == 0); OCL_ASSERT(((T*)buf_data[2])[i].z == 0); OCL_ASSERT(((T*)buf_data[2])[i].w == n-i); }else OCL_ASSERT(0); } OCL_UNMAP_BUFFER(2); } } #define compiler_overflow_add(type, subtype, kernel, func_type) \ static void compiler_overflow_add_ ##type(void)\ {\ test(# kernel, func_type);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_overflow_add_ ## type); #define compiler_overflow_sub(type, subtype, kernel, func_type) \ static void compiler_overflow_sub_ ##type(void)\ {\ test(# kernel, func_type);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_overflow_sub_ ## type); compiler_overflow_add(ulong4, unsigned long, compiler_overflow_ulong4_add, 0) compiler_overflow_add(uint4, uint32_t, compiler_overflow_uint4_add, 0) compiler_overflow_add(ushort4, uint16_t, compiler_overflow_ushort4_add, 0) compiler_overflow_add(uchar4, uint8_t, compiler_overflow_uchar4_add, 0) // as llvm intrincs function doesn't support byte/short overflow, // we just test uint overflow here. compiler_overflow_sub(uint4, uint32_t, compiler_overflow_uint4_sub, 1) Beignet-1.1.1-Source/utests/builtin_bitselect.cpp000664 001750 001750 00000002455 12576733264 021241 0ustar00yryr000000 000000 #include "utest_helper.hpp" int as_int(float f) { void *p = &f; return *(int *)p; } int cpu(int a, int b, int c) { return (a & ~c) | (b & c); } void builtin_bitselect(void) { const int n = 32; float src1[n], src2[n], src3[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_bitselect"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { src1[i] = ((float*)buf_data[0])[i] = rand() * 0.1f; src2[i] = ((float*)buf_data[1])[i] = rand() * 0.1f; src3[i] = ((float*)buf_data[2])[i] = rand() * 0.1f; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); OCL_NDRANGE(1); OCL_MAP_BUFFER(3); for (int i = 0; i < n; ++i) OCL_ASSERT(((int*)buf_data[3])[i] == cpu(as_int(src1[i]), as_int(src2[i]), as_int(src3[i]))); OCL_UNMAP_BUFFER(3); } MAKE_UTEST_FROM_FUNCTION(builtin_bitselect); Beignet-1.1.1-Source/utests/compiler_rhadd.cpp000664 001750 001750 00000001727 12576733264 020512 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_rhadd(void) { const int n = 32; int src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_rhadd"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { long long a = src1[i]; a += src2[i]; a ++; a >>= 1; OCL_ASSERT(((int*)buf_data[2])[i] == (int)a); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_rhadd); Beignet-1.1.1-Source/utests/compiler_array2.cpp000664 001750 001750 00000002422 12576733264 020621 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { int final[16]; int array[16]; for (int j = 0; j < 16; ++j) array[j] = j; for (int j = 0; j < 16; ++j) final[j] = j+1; if (global_id == 15) dst[global_id] = final[global_id]; else dst[global_id] = array[15 - global_id]; } void compiler_array2(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_array2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_array2); Beignet-1.1.1-Source/utests/compiler_lower_return2.cpp000664 001750 001750 00000002264 12576733264 022236 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst) { const int id = global_id; dst[id] = id; while (dst[id] > src[id]) { if (dst[id] > 10) return; dst[id]--; } dst[id] += 2; } static void compiler_lower_return2(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_lower_return2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_lower_return2); Beignet-1.1.1-Source/utests/utest_helper.hpp000664 001750 001750 00000015251 12576733264 020243 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest_helper.hpp * * \author Benjamin Segovia */ #ifndef __UTEST_HELPER_HPP__ #define __UTEST_HELPER_HPP__ #include "CL/cl.h" #include "CL/cl_intel.h" #include "utest.hpp" #include "utest_assert.hpp" #include "utest_error.h" #include #include #include #ifdef HAS_EGL #define EGL_WINDOW_WIDTH 256 #define EGL_WINDOW_HEIGHT 256 #include #include #include #include extern EGLDisplay eglDisplay; extern EGLContext eglContext; extern EGLSurface eglSurface; #endif #define OCL_THROW_ERROR(FN, STATUS) \ do { \ char msg[2048]; \ sprintf(msg, "error calling %s with error %s \n", #FN, err_msg[-STATUS]); \ OCL_ASSERTM(false, msg); \ } while (0) #define OCL_CALL(FN, ...) \ do { \ int status = FN(__VA_ARGS__); \ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \ } while (0) #define OCL_CREATE_KERNEL(NAME) \ do { \ OCL_CALL (cl_kernel_init, NAME".cl", NAME, SOURCE, NULL); \ } while (0) #define OCL_DESTROY_KERNEL_KEEP_PROGRAM(KEEP_PROGRAM) \ do { \ cl_kernel_destroy(!(KEEP_PROGRAM)); \ } while(0) #define OCL_CREATE_KERNEL_FROM_FILE(FILE_NAME, KERNEL_NAME) \ do { \ OCL_CALL(cl_kernel_init, FILE_NAME".cl", KERNEL_NAME, SOURCE, NULL); \ } while (0) #define OCL_FLUSH() \ do { \ OCL_CALL(clFlush, queue); \ } while(0) #define OCL_FINISH() \ do { \ OCL_CALL(clFinish, queue); \ } while(0) #define OCL_CALL2(FN, RET, ...) \ do { \ cl_int status; \ RET = FN(__VA_ARGS__, &status);\ if (status != CL_SUCCESS) OCL_THROW_ERROR(FN, status); \ } while (0) #define OCL_CREATE_BUFFER(BUFFER, FLAGS, SIZE, DATA) \ OCL_CALL2(clCreateBuffer, BUFFER, ctx, FLAGS, SIZE, DATA) #define OCL_CREATE_USER_EVENT(EVENT) \ OCL_CALL2(clCreateUserEvent, EVENT, ctx) #define OCL_SET_USER_EVENT_STATUS(EVENT, STATUS) \ OCL_CALL(clSetUserEventStatus, EVENT, STATUS) #define OCL_CREATE_IMAGE(IMAGE, FLAGS, FORMAT, DESC, DATA) \ OCL_CALL2(clCreateImage, IMAGE, ctx, FLAGS, FORMAT, DESC, DATA) #define OCL_READ_IMAGE(IMAGE, ORIGIN, REGION, DATA) \ OCL_CALL(clEnqueueReadImage, queue, IMAGE, CL_TRUE, ORIGIN, REGION, 0, 0, DATA, 0, NULL, NULL) #define OCL_WRITE_IMAGE(IMAGE, ORIGIN, REGION, DATA) \ OCL_CALL(clEnqueueWriteImage, queue, IMAGE, CL_TRUE, ORIGIN, REGION, 0, 0, DATA, 0, NULL, NULL) #define OCL_CREATE_GL_IMAGE(IMAGE, FLAGS, TARGET, LEVEL, TEXTURE) \ OCL_CALL2(clCreateFromGLTexture, IMAGE, ctx, FLAGS, TARGET, LEVEL, TEXTURE) #define OCL_ENQUEUE_ACQUIRE_GL_OBJECTS(ID) \ OCL_CALL(clEnqueueAcquireGLObjects, queue, 1, &buf[ID], 0, 0, 0) #define OCL_SWAP_EGL_BUFFERS() \ eglSwapBuffers(eglDisplay, eglSurface); #define OCL_CREATE_SAMPLER(SAMPLER, ADDRESS_MODE, FILTER_MODE) \ OCL_CALL2(clCreateSampler, SAMPLER, ctx, 0, ADDRESS_MODE, FILTER_MODE) #define OCL_MAP_BUFFER(ID) \ OCL_CALL2(clMapBufferIntel, buf_data[ID], buf[ID]) #define OCL_UNMAP_BUFFER(ID) \ do { \ if (buf[ID] != NULL) { \ OCL_CALL (clUnmapBufferIntel, buf[ID]); \ buf_data[ID] = NULL; \ } \ } while (0) #define OCL_MAP_BUFFER_GTT(ID) \ OCL_CALL2(clMapBufferGTTIntel, buf_data[ID], buf[ID]) #define OCL_UNMAP_BUFFER_GTT(ID) \ do { \ if (buf[ID] != NULL) { \ OCL_CALL (clUnmapBufferGTTIntel, buf[ID]); \ buf_data[ID] = NULL; \ } \ } while (0) #define OCL_NDRANGE(DIM_N) \ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, DIM_N, NULL, globals, locals, 0, NULL, NULL) #define OCL_SET_ARG(ID, SIZE, ARG) \ OCL_CALL (clSetKernelArg, kernel, ID, SIZE, ARG) #define OCL_CHECK_IMAGE(DATA, W, H, FILENAME) \ if (cl_check_image(DATA, W, H, FILENAME) == 0) \ OCL_ASSERTM(false, "image mismatch") enum { MAX_BUFFER_N = 16 }; extern cl_platform_id platform; extern cl_device_id device; extern cl_context ctx; extern cl_program program; extern cl_kernel kernel; extern cl_command_queue queue; extern cl_mem buf[MAX_BUFFER_N]; extern void* buf_data[MAX_BUFFER_N]; extern size_t globals[3]; extern size_t locals[3]; extern float ULPSIZE_FAST_MATH; enum { SOURCE = 0, LLVM = 1, BIN = 2 }; /* The SF is float type spliter*/ typedef struct { unsigned int mantissa:23; unsigned int exponent:8; unsigned int sign:1; } FLOAT; typedef union { float f; unsigned int i; FLOAT spliter; } SF; /* Init OpenCL */ extern int cl_ocl_init(void); /* Init program and kernel for the test */ extern int cl_kernel_init(const char *file_name, const char *kernel_name, int format, const char * build_opt); /* Get the file path */ extern char* cl_do_kiss_path(const char *file, cl_device_id device); /* init the bunch of global varaibles here */ extern int cl_test_init(const char *file_name, const char *kernel_name, int format); /* Unmap and release all the created buffers */ extern void cl_buffer_destroy(void); /* Release OCL queue, context and device */ extern void cl_ocl_destroy(void); /* Release kernel and program */ extern void cl_kernel_destroy(bool needDestroyProgram = true); /* Release everything allocated in cl_test_init */ extern void cl_test_destroy(void); /* Nicely output the performance counters */ extern void cl_report_perf_counters(cl_mem perf); /* Read a bmp from file */ extern int *cl_read_bmp(const char *filename, int *width, int *height); /* Write a bmp to a file */ extern void cl_write_bmp(const int *data, int width, int height, const char *filename); /* Check data from img against bmp file located at "bmp" */ extern int cl_check_image(const int *img, int w, int h, const char *bmp); /* Calculator ULP of each FLOAT value */ extern float cl_FLT_ULP(float float_number); /* Calculator ULP of each INT value */ extern int cl_INT_ULP(int int_number); /* subtract the time */ double time_subtract(struct timeval *y, struct timeval *x, struct timeval *result); /* check ulpsize */ float select_ulpsize(float ULPSIZE_FAST_MATH, float ULPSIZE_NO_FAST_MATH); #endif /* __UTEST_HELPER_HPP__ */ Beignet-1.1.1-Source/utests/compiler_if_else.cpp000664 001750 001750 00000003523 12576733264 021032 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_if_else(void) { const size_t n = 17; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_if_else"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 1); } // Second control flow for (uint32_t i = 0; i < n; ++i) ((int32_t*)buf_data[0])[i] = -1; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 16; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 2); } // Third control flow for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = 2; for (uint32_t i = 4; i < n; ++i) ((int32_t*)buf_data[0])[i] = -1; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < 3; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 1); } OCL_ASSERT(((int32_t*)buf_data[1])[3] == -1); OCL_ASSERT(((int32_t*)buf_data[0])[3] == 1); for (uint32_t i = 4; i < 16; ++i) { OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); OCL_ASSERT(((int32_t*)buf_data[0])[i] == 2); } } MAKE_UTEST_FROM_FUNCTION(compiler_if_else); Beignet-1.1.1-Source/utests/load_program_from_bin_file.cpp000664 001750 001750 00000004462 12576733264 023055 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include "utest_file_map.hpp" #include #include using namespace std; static void cpu(int global_id, float *src, float *dst) { dst[global_id] = ceilf(src[global_id]); } static void test_load_program_from_bin_file(void) { const size_t n = 16; float cpu_dst[16], cpu_src[16]; cl_int status; cl_int binary_status; char *ker_path = NULL; cl_file_map_t *fm = cl_file_map_new(); ker_path = cl_do_kiss_path("compiler_ceil.bin", device); OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS); const unsigned char *src = (const unsigned char *)cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithBinary(ctx, 1, &device, &sz, &src, &binary_status, &status); OCL_ASSERT(program && status == CL_SUCCESS); /* OCL requires to build the program even if it is created from a binary */ OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == CL_SUCCESS); kernel = clCreateKernel(program, "compiler_ceil", &status); OCL_ASSERT(status == CL_SUCCESS); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); #if 0 printf("#### GPU:\n"); for (int32_t i = 0; i < (int32_t) n; ++i) printf(" %f", ((float *)buf_data[1])[i]); printf("\n#### CPU:\n"); for (int32_t i = 0; i < (int32_t) n; ++i) printf(" %f", cpu_dst[i]); printf("\n"); #endif for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((float *)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(test_load_program_from_bin_file); Beignet-1.1.1-Source/utests/compiler_short_scatter.cpp000664 001750 001750 00000001045 12576733264 022305 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_short_scatter(void) { const size_t n = 128; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_short_scatter"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int16_t), NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int16_t*)buf_data[0])[i] == (int16_t) i); } MAKE_UTEST_FROM_FUNCTION(compiler_short_scatter); Beignet-1.1.1-Source/utests/compiler_double_3.cpp000664 001750 001750 00000002267 12576733264 021124 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void cpu(int global_id, float *src, double *dst) { float d = 1.234567890123456789; dst[global_id] = global_id < 14 ? d : 14; } void compiler_double_3(void) { const size_t n = 16; float cpu_src[n]; double cpu_dst[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double_3"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 1; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_double_3); Beignet-1.1.1-Source/utests/compiler_constant_expr.cpp000664 001750 001750 00000001725 12576733264 022315 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include static void compiler_constant_expr(void) { const size_t n = 48; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_constant_expr"); buf_data[0] = (uint32_t*) malloc(sizeof(float) * n); for (uint32_t i = 0; i < n; ++i) ((float*)buf_data[0])[i] = i; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(float), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < n; ++i) { float expect = pow(((float*)buf_data[0])[i], (i % 3) + 1); float err = fabs(((float*)buf_data[1])[i] - expect); OCL_ASSERT(err <= 100 * cl_FLT_ULP(expect)); } } MAKE_UTEST_FROM_FUNCTION(compiler_constant_expr); Beignet-1.1.1-Source/utests/compiler_fill_image_3d_2.cpp000664 001750 001750 00000002307 12576733264 022322 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void compiler_fill_image_3d_2(void) { const size_t w = 512; const size_t h = 512; const size_t depth = 5; cl_image_format format; cl_image_desc desc; memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, sizeof(cl_image_format)); format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = w; desc.image_height = h; desc.image_depth = depth; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image_3d_2"); OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = w; globals[1] = h; globals[2] = depth; locals[0] = 16; locals[1] = 16; locals[2] = 1; OCL_NDRANGE(3); // Check result OCL_MAP_BUFFER_GTT(0); for (uint32_t k = 0; k < depth; k++) for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[k*w*h + j*w + i] == 0x78563412); OCL_UNMAP_BUFFER_GTT(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_3d_2); Beignet-1.1.1-Source/utests/utest_helper.cpp000664 001750 001750 00000047114 12576733264 020241 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_file_map.hpp" #include "utest_helper.hpp" #include "utest_error.h" #include "CL/cl.h" #include "CL/cl_intel.h" #include #include #include #include #include #define FATAL(...) \ do { \ fprintf(stderr, "error: "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n");\ assert(0); \ exit(-1); \ } while (0) #define FATAL_IF(COND, ...) \ do { \ if (COND) FATAL(__VA_ARGS__); \ } while (0) cl_platform_id platform = NULL; cl_device_id device = NULL; cl_context ctx = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_command_queue queue = NULL; cl_mem buf[MAX_BUFFER_N] = {}; void *buf_data[MAX_BUFFER_N] = {}; size_t globals[3] = {}; size_t locals[3] = {}; float ULPSIZE_FAST_MATH = 10000.; #ifdef HAS_EGL Display *xDisplay; EGLDisplay eglDisplay; EGLContext eglContext = NULL; EGLSurface eglSurface; Window xWindow; void cl_ocl_destroy_egl_window() { eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); eglDestroyContext(eglDisplay, eglContext); eglDestroySurface(eglDisplay, eglSurface); XDestroyWindow(xDisplay, xWindow); XCloseDisplay(xDisplay); } bool init_egl_window(int width, int height) { XSetWindowAttributes swa; Window win, root; EGLint attr[] = { // some attributes to set up our egl-interface EGL_BUFFER_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, EGL_NONE }; //// egl-contexts collect all state descriptions needed required for operation EGLint ctxattr[] = { #if 0 EGL_CONTEXT_CLIENT_VERSION, 2, #endif EGL_NONE }; EGLConfig ecfg; EGLint numConfig; eglContext = EGL_NO_CONTEXT; xDisplay = XOpenDisplay(NULL); if (xDisplay == NULL) { fprintf(stderr, "Failed to open DISPLAY.\n"); return false; } root = DefaultRootWindow(xDisplay); swa.event_mask = ExposureMask | PointerMotionMask | KeyPressMask; win = XCreateWindow( xDisplay, root, 0, 0, width, height, 0, CopyFromParent, InputOutput, CopyFromParent, CWEventMask, &swa); xWindow = win; /////// the egl part ////////////////////////////////////////////////////////////////// // egl provides an interface to connect the graphics related functionality of openGL ES // with the windowing interface and functionality of the native operation system (X11 // in our case. eglDisplay = eglGetDisplay( (EGLNativeDisplayType) xDisplay ); if ( eglDisplay == EGL_NO_DISPLAY ) { fprintf(stderr, "Got no EGL display.\n"); return false; } eglBindAPI(EGL_OPENGL_API); int m,n; if ( !eglInitialize( eglDisplay, &m, &n ) ) { fprintf(stderr, "Unable to initialize EGL\n"); return false; } if ( !eglChooseConfig( eglDisplay, attr, &ecfg, 1, &numConfig ) ) { fprintf(stderr, "Failed to choose config (eglError: %d)\n", eglGetError()); return false; } if ( numConfig != 1 ) { fprintf(stderr, "Didn't get exactly one config, but %d", numConfig); return false; } eglSurface = eglCreateWindowSurface ( eglDisplay, ecfg, win, NULL ); if ( eglSurface == EGL_NO_SURFACE ) { fprintf(stderr, "Unable to create EGL surface (eglError: %d)\n", eglGetError()); return false; } eglContext = eglCreateContext ( eglDisplay, ecfg, EGL_NO_CONTEXT, ctxattr ); if ( eglContext == EGL_NO_CONTEXT ) { fprintf(stderr, "Unable to create EGL context (eglError: %d)\n", eglGetError()); return false; } //// associate the egl-context with the egl-surface eglMakeCurrent( eglDisplay, eglSurface, eglSurface, eglContext); glClearColor(1.0, 1.0, 1.0, 1.0); glClear(GL_COLOR_BUFFER_BIT); glFinish(); eglSwapBuffers(eglDisplay, eglSurface); return true; } #endif static const char* cl_test_channel_order_string(cl_channel_order order) { switch(order) { #define DECL_ORDER(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_ORDER(R); DECL_ORDER(A); DECL_ORDER(RG); DECL_ORDER(RA); DECL_ORDER(RGB); DECL_ORDER(RGBA); DECL_ORDER(BGRA); DECL_ORDER(ARGB); DECL_ORDER(INTENSITY); DECL_ORDER(LUMINANCE); DECL_ORDER(Rx); DECL_ORDER(RGx); DECL_ORDER(RGBx); #undef DECL_ORDER default: return "Unsupported image channel order"; }; } static const char* cl_test_channel_type_string(cl_channel_type type) { switch(type) { #define DECL_TYPE(WHICH) case CL_##WHICH: return "CL_"#WHICH DECL_TYPE(SNORM_INT8); DECL_TYPE(SNORM_INT16); DECL_TYPE(UNORM_INT8); DECL_TYPE(UNORM_INT16); DECL_TYPE(UNORM_SHORT_565); DECL_TYPE(UNORM_SHORT_555); DECL_TYPE(UNORM_INT_101010); DECL_TYPE(SIGNED_INT8); DECL_TYPE(SIGNED_INT16); DECL_TYPE(SIGNED_INT32); DECL_TYPE(UNSIGNED_INT8); DECL_TYPE(UNSIGNED_INT16); DECL_TYPE(UNSIGNED_INT32); DECL_TYPE(HALF_FLOAT); DECL_TYPE(FLOAT); #undef DECL_TYPE default: return "Unsupported image channel type"; }; } static void clpanic(const char *msg, int rval) { printf("Failed: %s (%d)\n", msg, rval); exit(-1); } char* cl_do_kiss_path(const char *file, cl_device_id device) { cl_int ver; const char *sub_path = NULL; char *ker_path = NULL; const char *kiss_path = getenv("OCL_KERNEL_PATH"); size_t sz = strlen(file); if (device == NULL) sub_path = ""; else { if (clGetGenVersionIntel(device, &ver) != CL_SUCCESS) clpanic("Unable to get Gen version", -1); sub_path = ""; } if (kiss_path == NULL) clpanic("set OCL_KERNEL_PATH. This is where the kiss kernels are", -1); sz += strlen(kiss_path) + strlen(sub_path) + 2; /* +1 for end of string, +1 for '/' */ if ((ker_path = (char*) malloc(sz)) == NULL) clpanic("Allocation failed", -1); sprintf(ker_path, "%s/%s%s", kiss_path, sub_path, file); return ker_path; } int cl_kernel_init(const char *file_name, const char *kernel_name, int format, const char * build_opt) { cl_file_map_t *fm = NULL; char *ker_path = NULL; cl_int status = CL_SUCCESS; static const char *prevFileName = NULL; /* Load the program and build it */ if (!program || (program && (!prevFileName || strcmp(prevFileName, file_name)))) { if (program) clReleaseProgram(program); ker_path = cl_do_kiss_path(file_name, device); if (format == LLVM) program = clCreateProgramWithLLVMIntel(ctx, 1, &device, ker_path, &status); else if (format == SOURCE) { cl_file_map_t *fm = cl_file_map_new(); FATAL_IF (cl_file_map_open(fm, ker_path) != CL_FILE_MAP_SUCCESS, "Failed to open file \"%s\" with kernel \"%s\". Did you properly set OCL_KERNEL_PATH variable?", file_name, kernel_name); const char *src = cl_file_map_begin(fm); const size_t sz = cl_file_map_size(fm); program = clCreateProgramWithSource(ctx, 1, &src, &sz, &status); cl_file_map_delete(fm); } else FATAL("Not able to create program from binary"); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateProgramWithBinary\n"); goto error; } prevFileName = file_name; /* OCL requires to build the program even if it is created from a binary */ OCL_CALL (clBuildProgram, program, 1, &device, build_opt, NULL, NULL); } /* Create a kernel from the program */ if (kernel) clReleaseKernel(kernel); kernel = clCreateKernel(program, kernel_name, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateKernel\n"); goto error; } exit: free(ker_path); cl_file_map_delete(fm); return status; error: prevFileName = NULL; goto exit; } #define GET_PLATFORM_STR_INFO(LOWER_NAME, NAME) \ { \ size_t param_value_size; \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, 0, 0, ¶m_value_size); \ std::vector param_value(param_value_size); \ OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ std::string str; \ if (!param_value.empty()) \ str = std::string(¶m_value.front(), param_value_size-1); \ printf("platform_" #LOWER_NAME " \"%s\"\n", str.c_str()); \ } #include #define GET_DEVICE_STR_INFO(LOWER_NAME, NAME) \ std::string LOWER_NAME ##Str; \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, 0, 0, ¶m_value_size); \ { \ std::vector param_value(param_value_size); \ OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_##NAME, \ param_value_size, param_value.empty() ? NULL : ¶m_value.front(), \ ¶m_value_size); \ if (!param_value.empty()) \ LOWER_NAME ##Str = std::string(¶m_value.front(), param_value_size-1); \ } \ printf("device_" #LOWER_NAME " \"%s\"\n", LOWER_NAME ##Str.c_str()); int cl_ocl_init(void) { cl_int status = CL_SUCCESS; cl_uint platform_n; size_t i; #ifdef HAS_EGL bool hasGLExt = false; #endif cl_context_properties *props = NULL; /* Get the platform number */ OCL_CALL (clGetPlatformIDs, 0, NULL, &platform_n); printf("platform number %u\n", platform_n); assert(platform_n >= 1); /* Get a valid platform */ OCL_CALL (clGetPlatformIDs, 1, &platform, &platform_n); GET_PLATFORM_STR_INFO(profile, PROFILE); GET_PLATFORM_STR_INFO(name, NAME); GET_PLATFORM_STR_INFO(vendor, VENDOR); GET_PLATFORM_STR_INFO(version, VERSION); GET_PLATFORM_STR_INFO(extensions, EXTENSIONS); /* Get the device (only GPU device is supported right now) */ try { OCL_CALL (clGetDeviceIDs, platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); { size_t param_value_size; GET_DEVICE_STR_INFO(profile, PROFILE); GET_DEVICE_STR_INFO(name, NAME); GET_DEVICE_STR_INFO(vendor, VENDOR); GET_DEVICE_STR_INFO(version, VERSION); GET_DEVICE_STR_INFO(extensions, EXTENSIONS); GET_DEVICE_STR_INFO(opencl_c_version, OPENCL_C_VERSION); #ifdef HAS_EGL if (std::strstr(extensionsStr.c_str(), "cl_khr_gl_sharing")) { hasGLExt = true; } #endif } } catch (...) { fprintf(stderr, "error calling clGetDeviceIDs\n"); status = CL_DEVICE_NOT_FOUND; goto error; } #ifdef HAS_EGL if (hasGLExt) { int i = 0; props = new cl_context_properties[7]; props[i++] = CL_CONTEXT_PLATFORM; props[i++] = (cl_context_properties)platform; if (init_egl_window(EGL_WINDOW_WIDTH, EGL_WINDOW_HEIGHT)) { props[i++] = CL_EGL_DISPLAY_KHR; props[i++] = (cl_context_properties)eglGetCurrentDisplay(); props[i++] = CL_GL_CONTEXT_KHR; props[i++] = (cl_context_properties)eglGetCurrentContext(); } props[i++] = 0; } #endif /* Now create a context */ ctx = clCreateContext(props, 1, &device, NULL, NULL, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateContext\n"); goto error; } /* All image types currently supported by the context */ cl_image_format fmt[256]; cl_uint fmt_n; clGetSupportedImageFormats(ctx, 0, CL_MEM_OBJECT_IMAGE2D, 256, fmt, &fmt_n); printf("%u image formats are supported\n", fmt_n); for (i = 0; i < fmt_n; ++i) printf("[%s %s]\n", cl_test_channel_order_string(fmt[i].image_channel_order), cl_test_channel_type_string(fmt[i].image_channel_data_type)); /* We are going to push NDRange kernels here */ queue = clCreateCommandQueue(ctx, device, 0, &status); if (status != CL_SUCCESS) { fprintf(stderr, "error calling clCreateCommandQueue\n"); goto error; } error: if (props) delete[] props; return status; } int cl_test_init(const char *file_name, const char *kernel_name, int format) { cl_int status = CL_SUCCESS; /* Initialize OCL */ if ((status = cl_ocl_init()) != CL_SUCCESS) goto error; /* Load the kernel */ if ((status = cl_kernel_init(file_name, kernel_name, format, NULL)) != CL_SUCCESS) goto error; error: return status; } void cl_kernel_destroy(bool needDestroyProgram) { if (kernel) { clReleaseKernel(kernel); kernel = NULL; } if (needDestroyProgram && program) { clReleaseProgram(program); program = NULL; } } void cl_ocl_destroy(void) { clReleaseCommandQueue(queue); clReleaseContext(ctx); #ifdef HAS_EGL if (eglContext != NULL) { cl_ocl_destroy_egl_window(); eglContext = NULL; } #endif } void cl_test_destroy(void) { cl_kernel_destroy(); cl_ocl_destroy(); printf("%i memory leaks\n", clReportUnfreedIntel()); assert(clReportUnfreedIntel() == 0); } void cl_buffer_destroy(void) { int i; for (i = 0; i < MAX_BUFFER_N; ++i) { if (buf_data[i] != NULL) { clUnmapBufferIntel(buf[i]); buf_data[i] = NULL; } if (buf[i] != NULL) { clReleaseMemObject(buf[i]); buf[i] = NULL; } } } void cl_report_perf_counters(cl_mem perf) { cl_int status = CL_SUCCESS; uint32_t *start = NULL, *end = NULL; uint32_t i; if (perf == NULL) return; start = (uint32_t*) clMapBufferIntel(perf, &status); assert(status == CL_SUCCESS && start != NULL); end = start + 128; printf("BEFORE\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, start[i]); } printf("\n\n"); printf("AFTER\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u 0x%8x] ", i, end[i]); } printf("\n\n"); printf("DIFF\n"); for (i = 0; i < 6*8; ++i) { if (i % 8 == 0) printf("\n"); printf("[%3u %8i] ", i, end[i] - start[i]); } printf("\n\n"); clUnmapBufferIntel(perf); } struct bmphdr { // 2 bytes of magic here, "BM", total header size is 54 bytes! int filesize; // 4 total file size incl header short as0, as1; // 8 app specific int bmpoffset; // 12 ofset of bmp data int headerbytes; // 16 bytes in header from this point (40 actually) int width; // 20 int height; // 24 short nplanes; // 26 no of color planes short bpp; // 28 bits/pixel int compression; // 32 BI_RGB = 0 = no compression int sizeraw; // 36 size of raw bmp file, excluding header, incl padding int hres; // 40 horz resolutions pixels/meter int vres; // 44 int npalcolors; // 48 No of colors in palette int nimportant; // 52 No of important colors // raw b, g, r data here, dword aligned per scan line }; int *cl_read_bmp(const char *filename, int *width, int *height) { struct bmphdr hdr; char *bmppath = cl_do_kiss_path(filename, device); FILE *fp = fopen(bmppath, "rb"); assert(fp); char magic[2]; int ret; ret = fread(&magic[0], 1, 2, fp); if(2 != ret){ fclose(fp); free(bmppath); return NULL; } assert(magic[0] == 'B' && magic[1] == 'M'); ret = fread(&hdr, sizeof(hdr), 1, fp); if(1 != ret){ fclose(fp); free(bmppath); return NULL; } assert(hdr.width > 0 && hdr.height > 0 && hdr.nplanes == 1 && hdr.compression == 0); int *rgb32 = (int *) malloc(hdr.width * hdr.height * sizeof(int)); assert(rgb32); int x, y; int *dst = rgb32; for (y = 0; y < hdr.height; y++) { for (x = 0; x < hdr.width; x++) { assert(!feof(fp)); int b = (getc(fp) & 0x0ff); int g = (getc(fp) & 0x0ff); int r = (getc(fp) & 0x0ff); *dst++ = (r | (g << 8) | (b << 16) | 0xff000000); /* abgr */ } while (x & 3) { getc(fp); x++; } // each scanline padded to dword // printf("read row %d\n", y); // fflush(stdout); } fclose(fp); *width = hdr.width; *height = hdr.height; free(bmppath); return rgb32; } void cl_write_bmp(const int *data, int width, int height, const char *filename) { int x, y; FILE *fp = fopen(filename, "wb"); assert(fp); char *raw = (char *) malloc(width * height * sizeof(int)); // at most assert(raw); char *p = raw; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { int c = *data++; *p++ = ((c >> 16) & 0xff); *p++ = ((c >> 8) & 0xff); *p++ = ((c >> 0) & 0xff); } while (x & 3) { *p++ = 0; x++; } // pad to dword } int sizeraw = p - raw; int scanline = (width * 3 + 3) & ~3; assert(sizeraw == scanline * height); struct bmphdr hdr; hdr.filesize = scanline * height + sizeof(hdr) + 2; hdr.as0 = 0; hdr.as1 = 0; hdr.bmpoffset = sizeof(hdr) + 2; hdr.headerbytes = 40; hdr.width = width; hdr.height = height; hdr.nplanes = 1; hdr.bpp = 24; hdr.compression = 0; hdr.sizeraw = sizeraw; hdr.hres = 0; // 2834; hdr.vres = 0; // 2834; hdr.npalcolors = 0; hdr.nimportant = 0; /* Now write bmp file */ char magic[2] = { 'B', 'M' }; fwrite(&magic[0], 1, 2, fp); fwrite(&hdr, 1, sizeof(hdr), fp); fwrite(raw, 1, hdr.sizeraw, fp); fclose(fp); free(raw); } static const float pixel_threshold = 0.05f; static const float max_error_ratio = 0.001f; int cl_check_image(const int *img, int w, int h, const char *bmp) { int refw, refh; int *ref = cl_read_bmp(bmp, &refw, &refh); if (ref == NULL || refw != w || refh != h) return 0; const int n = w*h; int discrepancy = 0; for (int i = 0; i < n; ++i) { const float r = (float) (img[i] & 0xff); const float g = (float) ((img[i] >> 8) & 0xff); const float b = (float) ((img[i] >> 16) & 0xff); const float rr = (float) (ref[i] & 0xff); const float rg = (float) ((ref[i] >> 8) & 0xff); const float rb = (float) ((ref[i] >> 16) & 0xff); const float dr = fabs(r-rr) / (1.f/255.f + std::max(r,rr)); const float dg = fabs(g-rg) / (1.f/255.f + std::max(g,rg)); const float db = fabs(b-rb) / (1.f/255.f + std::max(b,rb)); const float err = sqrtf(dr*dr+dg*dg+db*db); if (err > pixel_threshold) discrepancy++; } free(ref); return (float(discrepancy) / float(n) > max_error_ratio) ? 0 : 1; } float cl_FLT_ULP(float float_number) { SF floatBin, ulpBin, ulpBinBase; floatBin.f = float_number; ulpBin.spliter.sign = ulpBinBase.spliter.sign = 0; ulpBin.spliter.exponent = ulpBinBase.spliter.exponent = floatBin.spliter.exponent; ulpBin.spliter.mantissa = 0x1; ulpBinBase.spliter.mantissa = 0x0; return ulpBin.f - ulpBinBase.f; } int cl_INT_ULP(int int_number) { return 0; } double time_subtract(struct timeval *y, struct timeval *x, struct timeval *result) { if ( x->tv_sec > y->tv_sec ) return -1; if ((x->tv_sec == y->tv_sec) && (x->tv_usec > y->tv_usec)) return -1; if ( result != NULL){ result->tv_sec = ( y->tv_sec - x->tv_sec ); result->tv_usec = ( y->tv_usec - x->tv_usec ); if (result->tv_usec < 0){ result->tv_sec --; result->tv_usec += 1000000; } } double msec = 1000.0*(y->tv_sec - x->tv_sec) + (y->tv_usec - x->tv_usec)/1000.0; return msec; } float select_ulpsize(float ULPSIZE_FAST_MATH, float ULPSIZE_NO_FAST_MATH) { const char* env_strict = getenv("OCL_STRICT_CONFORMANCE"); float ULPSIZE_FACTOR = ULPSIZE_NO_FAST_MATH; if (env_strict != NULL && strcmp(env_strict, "0") == 0 ) ULPSIZE_FACTOR = ULPSIZE_FAST_MATH; return ULPSIZE_FACTOR; } Beignet-1.1.1-Source/utests/compiler_clz.cpp000664 001750 001750 00000011070 12576733264 020210 0ustar00yryr000000 000000 #include "utest_helper.hpp" namespace { template T get_max(); #define DEF_TEMPLATE_MAX(TYPE, NAME) \ template <> \ TYPE get_max() \ { \ static TYPE max = CL_##NAME##_MAX; \ return max; \ } \ \ template <> \ u##TYPE get_max() \ { \ static u##TYPE max = CL_U##NAME##_MAX; \ return max; \ } DEF_TEMPLATE_MAX(int8_t, CHAR) DEF_TEMPLATE_MAX(int16_t, SHRT) DEF_TEMPLATE_MAX(int32_t, INT) DEF_TEMPLATE_MAX(int64_t, LONG) template T get_min(); #define DEF_TEMPLATE_MIN(TYPE, NAME) \ template <> \ TYPE get_min() \ { \ static TYPE min = CL_##NAME##_MIN; \ return min; \ } \ \ template <> \ u##TYPE get_min() \ { \ static u##TYPE min = 0; \ return min; \ } DEF_TEMPLATE_MIN(int8_t, CHAR) DEF_TEMPLATE_MIN(int16_t, SHRT) DEF_TEMPLATE_MIN(int32_t, INT) DEF_TEMPLATE_MIN(int64_t, LONG) template void test(const char *kernel_name, int s_type) { const size_t n = 64; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_clz", kernel_name); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(U), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(U), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); U max = get_max(); U min = get_min(); OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) { ((U*)buf_data[0])[i] = max >> i; if(i == sizeof(U)*8) ((U*)buf_data[0])[i] = min; } OCL_UNMAP_BUFFER(0); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); // for unsigned type. if(s_type == 0) { for (uint32_t i = 0; i < n; ++i) { if(sizeof(U) == 1 && i < 8 ) OCL_ASSERT(((U*)buf_data[1])[i] == i ); else if(sizeof(U) == 2 && i < 16 ) OCL_ASSERT(((U*)buf_data[1])[i] == i ); else if(sizeof(U) == 4 && i < 32 ) OCL_ASSERT(((U*)buf_data[1])[i] == i ); else if(sizeof(U) == 8 && i < 64 ) OCL_ASSERT(((U*)buf_data[1])[i] == i ); } } else // signed type { for (uint32_t i = 0; i < n; ++i) { if(sizeof(U) == 1) { if( i < 8 ) OCL_ASSERT(((U*)buf_data[1])[i] == i+1 ); else if( i == 8 ) OCL_ASSERT(((U*)buf_data[1])[i] == 0 ); } else if(sizeof(U) == 2) { if( i < 16 ) OCL_ASSERT(((U*)buf_data[1])[i] == i+1 ); else if( i == 16 ) OCL_ASSERT(((U*)buf_data[1])[i] == 0 ); } else if(sizeof(U) == 4) { if( i < 32 ) OCL_ASSERT(((U*)buf_data[1])[i] == i+1 ); else if( i == 32 ) OCL_ASSERT(((U*)buf_data[1])[i] == 0 ); } else if(sizeof(U) == 8) { if( i < 63 ) OCL_ASSERT(((U*)buf_data[1])[i] == i+1 ); } } } OCL_UNMAP_BUFFER(1); } } #define compiler_clz(type, kernel, s_type)\ static void compiler_clz_ ##type(void)\ {\ test(# kernel, s_type);\ }\ MAKE_UTEST_FROM_FUNCTION(compiler_clz_ ## type); compiler_clz(uint64_t, compiler_clz_ulong, 0) compiler_clz(uint32_t, compiler_clz_uint, 0) compiler_clz(uint16_t, compiler_clz_ushort, 0) compiler_clz(uint8_t, compiler_clz_uchar, 0) compiler_clz(int64_t, compiler_clz_long, 1) compiler_clz(int32_t, compiler_clz_int, 1) compiler_clz(int16_t, compiler_clz_short, 1) compiler_clz(int8_t, compiler_clz_char, 1) Beignet-1.1.1-Source/utests/compiler_hadd.cpp000664 001750 001750 00000001712 12576733264 020322 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_hadd(void) { const int n = 32; int src1[n], src2[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_hadd"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int i = 0; i < n; ++i) { src1[i] = ((int*)buf_data[0])[i] = rand(); src2[i] = ((int*)buf_data[1])[i] = rand(); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(2); for (int i = 0; i < n; ++i) { long long a = src1[i]; a += src2[i]; a >>= 1; OCL_ASSERT(((int*)buf_data[2])[i] == (int)a); } OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_hadd); Beignet-1.1.1-Source/utests/builtin_sign.cpp000664 001750 001750 00000002301 12576733264 020211 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" void builtin_sign(void) { const int n = 32; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_sign"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); src[0] = ((float*)buf_data[0])[0] = nanf(""); src[1] = ((float*)buf_data[0])[1] = INFINITY; src[2] = ((float*)buf_data[0])[2] = 0.f; src[3] = ((float*)buf_data[0])[3] = -0.f; for (int i = 4; i < n; ++i) { src[i] = ((float*)buf_data[0])[i] = (rand() & 15) * 0.1 - 0.75; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*)buf_data[1]; OCL_ASSERT(dst[0] == 0); OCL_ASSERT(dst[1] == 1.f); OCL_ASSERT(dst[2] == 0.f); OCL_ASSERT(dst[3] == -0.f); for (int i = 4; i < n; ++i) { if (src[i] == 0.f) OCL_ASSERT(dst[i] == 0.f); else if (src[i] == -0.f) OCL_ASSERT(dst[i] == -0.f); else OCL_ASSERT(dst[i] == (src[i] > 0 ? 1 : -1)); } OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(builtin_sign); Beignet-1.1.1-Source/utests/compiler_double_2.cpp000664 001750 001750 00000002335 12576733264 021117 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void cpu(int global_id, float *src, double *dst) { float f = src[global_id]; float d = 1.234567890123456789; dst[global_id] = global_id < 14 ? d * (d + f) : 14; } void compiler_double_2(void) { const size_t n = 16; float cpu_src[n]; double cpu_dst[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_double_2"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 1; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_double_2); Beignet-1.1.1-Source/utests/compiler_step.cpp000664 001750 001750 00000021465 12576733264 020404 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include "string.h" template struct cl_vec { T ptr[((N+1)/2)*2]; //align to 2 elements. typedef cl_vec vec_type; cl_vec(void) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); } cl_vec(vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); } vec_type& operator= (vec_type & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } template vec_type& operator= (cl_vec & other) { memset(ptr, 0, sizeof(T) * ((N+1)/2)*2); memcpy (this->ptr, other.ptr, sizeof(T) * N); return *this; } bool operator== (vec_type & other) { return !memcmp (this->ptr, other.ptr, sizeof(T) * N); } void step (vec_type & other) { int i = 0; for (; i < N; i++) { T a = ptr[i]; T edge = other.ptr[i]; T f = a < edge ? 0.0 : 1.0; ptr[i] = f; } } void step (float & edge) { int i = 0; for (; i < N; i++) { T a = ptr[i]; T f = a < edge ? 0.0 : 1.0; ptr[i] = f; } } }; template static void cpu (int global_id, cl_vec *edge, cl_vec *src, cl_vec *dst) { cl_vec v = src[global_id]; v.step(edge[global_id]); dst[global_id] = v; } template static void cpu(int global_id, T *edge, T *src, U *dst) { T f = src[global_id]; T e = edge[global_id]; f = f < e ? 0.0 : 1.0; dst[global_id] = (U)f; } template static void cpu (int global_id, float edge, cl_vec *src, cl_vec *dst) { cl_vec v = src[global_id]; v.step(edge); dst[global_id] = v; } template static void cpu(int global_id, float edge, T *src, U *dst) { T f = src[global_id]; f = f < edge ? 0.0 : 1.0; dst[global_id] = (U)f; } template static void gen_rand_val (cl_vec& vect) { int i = 0; memset(vect.ptr, 0, sizeof(T) * ((N+1)/2)*2); for (; i < N; i++) { vect.ptr[i] = static_cast(.1f * (rand() & 15) - .75f); } } template static void gen_rand_val (T & val) { val = static_cast(.1f * (rand() & 15) - .75f); } template inline static void print_data (T& val) { if (std::is_unsigned::value) printf(" %u", val); else printf(" %d", val); } inline static void print_data (float& val) { printf(" %f", val); } template static void dump_data (cl_vec* edge, cl_vec* src, cl_vec* dst, int n) { U* val = reinterpret_cast(dst); n = n*((N+1)/2)*2; printf("\nEdge: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[1])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[2])[i]); } } template static void dump_data (T* edge, T* src, U* dst, int n) { printf("\nedge: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[1])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(dst[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[2])[i]); } } template static void dump_data (float edge, cl_vec* src, cl_vec* dst, int n) { U* val = reinterpret_cast(dst); n = n*((N+1)/2)*2; printf("\nEdge: %f\n", edge); printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(val[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void dump_data (float edge, T* src, U* dst, int n) { printf("\nedge: %f\n", edge); printf("\nx: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((T *)buf_data[0])[i]); } printf("\nCPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(dst[i]); } printf("\nGPU: \n"); for (int32_t i = 0; i < (int32_t) n; ++i) { print_data(((U *)buf_data[1])[i]); } } template static void compiler_step_with_type(void) { const size_t n = 16; T cpu_dst[n], cpu_src[n]; T edge[n]; // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = n; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(2); memset(buf_data[2], 0, sizeof(T) * n); OCL_UNMAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_src[i]); gen_rand_val(edge[i]); } memcpy(buf_data[1], cpu_src, sizeof(T) * n); memcpy(buf_data[0], edge, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, edge, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(2); //dump_data(edge, cpu_src, cpu_dst, n); OCL_ASSERT(!memcmp(buf_data[2], cpu_dst, sizeof(T) * n)); OCL_UNMAP_BUFFER(2); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(0); } } #define STEP_TEST_TYPE(TYPE) \ static void compiler_step_##TYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_step_"#TYPE, SOURCE, NULL); \ compiler_step_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_step_##TYPE); typedef cl_vec float2; typedef cl_vec float3; typedef cl_vec float4; typedef cl_vec float8; typedef cl_vec float16; STEP_TEST_TYPE(float) STEP_TEST_TYPE(float2) STEP_TEST_TYPE(float3) STEP_TEST_TYPE(float4) STEP_TEST_TYPE(float8) STEP_TEST_TYPE(float16) template static void compiler_stepf_with_type(void) { const size_t n = 16; T cpu_dst[n], cpu_src[n]; float edge = (float)(.1f * (rand() & 15) - .75f); // Setup buffers OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(float), &edge); OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = n; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); /* Clear the dst buffer to avoid random data. */ OCL_MAP_BUFFER(1); memset(buf_data[1], 0, sizeof(T) * n); OCL_UNMAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(cpu_src[i]); } memcpy(buf_data[0], cpu_src, sizeof(T) * n); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, edge, cpu_src, cpu_dst); // Compare OCL_MAP_BUFFER(1); //dump_data(edge, cpu_src, cpu_dst, n); OCL_ASSERT(!memcmp(buf_data[1], cpu_dst, sizeof(T) * n)); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(0); } } #define _STEPF_TEST_TYPE(TYPE, keep_program) \ static void compiler_stepf_##TYPE (void) \ { \ OCL_CALL (cl_kernel_init, "compiler_step.cl", "compiler_stepf_"#TYPE, SOURCE, NULL); \ compiler_stepf_with_type(); \ } \ MAKE_UTEST_FROM_FUNCTION_KEEP_PROGRAM(compiler_stepf_##TYPE, keep_program); #define STEPF_TEST_TYPE(TYPE) _STEPF_TEST_TYPE(TYPE, true) #define STEPF_TEST_TYPE_END(TYPE) _STEPF_TEST_TYPE(TYPE, false) STEPF_TEST_TYPE(float) STEPF_TEST_TYPE(float2) STEPF_TEST_TYPE(float3) STEPF_TEST_TYPE(float4) STEPF_TEST_TYPE(float8) STEPF_TEST_TYPE_END(float16) Beignet-1.1.1-Source/utests/compiler_structure_attributes.cpp000664 001750 001750 00000000273 12576733264 023731 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_structure_attributes(void) { OCL_CREATE_KERNEL("compiler_structure_attributes"); } MAKE_UTEST_FROM_FUNCTION(compiler_structure_attributes); Beignet-1.1.1-Source/utests/compiler_lower_return1.cpp000664 001750 001750 00000002561 12576733264 022235 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void compiler_lower_return1(void) { const size_t n = 32; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_lower_return1"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * n); for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = 2; OCL_CREATE_BUFFER(buf[0], CL_MEM_COPY_HOST_PTR, n * sizeof(uint32_t), buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = 16; locals[0] = 16; OCL_NDRANGE(1); // First control flow OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == i); for (int32_t i = 11; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); // Second control flow for (uint32_t i = 0; i < 4; ++i) ((int32_t*)buf_data[0])[i] = -2; OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 4; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == -2); for (int32_t i = 4; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == i); for (int32_t i = 11; i < 16; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == 2); } MAKE_UTEST_FROM_FUNCTION(compiler_lower_return1); Beignet-1.1.1-Source/utests/compiler_assignment_operation_in_if.cpp000664 001750 001750 00000001652 12576733264 025021 0ustar00yryr000000 000000 #include "utest_helper.hpp" typedef struct cpu_int3{ int x; int y; int z; }cpu_int3; static void cpu(int gidx, int *dst) { cpu_int3 d1 = {gidx, gidx-1, gidx-3}; int k = gidx % 5; if (k == 1){ d1.x = d1.y; } int * addr = dst + gidx; *addr = d1.x; } void compiler_assignment_operation_in_if(void){ const size_t n = 16; int cpu_dst[16]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_assignment_operation_in_if"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = 16; locals[0] = 16; // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, cpu_dst); // Compare OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_assignment_operation_in_if) Beignet-1.1.1-Source/utests/compiler_movforphi_undef.cpp000664 001750 001750 00000003430 12576733264 022613 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include "string.h" static void compiler_movforphi_undef(void) { const size_t w = 16; const size_t h = 16; cl_sampler sampler; cl_image_format format; cl_image_desc desc; // Setup kernel and images OCL_CREATE_KERNEL("test_movforphi_undef"); buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w * h); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; memset(&desc, 0, sizeof(desc)); desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = w; desc.image_height = h; desc.image_row_pitch = w * sizeof(uint32_t); OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]); desc.image_row_pitch = 0; OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL); OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); free(buf_data[0]); buf_data[0] = NULL; // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(sampler), &sampler); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); // Just compare the initial 2 data is enough for this case, as the initial 2 data must in the first // tile box and we can just get the correct coords. for (uint32_t j = 0; j < 1; ++j) for (uint32_t i = 0; i < 3; i++) { if (i == 0) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i + 1] == ((uint32_t*)buf_data[1])[j * w + i]); } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_CALL(clReleaseSampler, sampler); } MAKE_UTEST_FROM_FUNCTION(compiler_movforphi_undef); Beignet-1.1.1-Source/utests/compiler_argument_structure.cpp000664 001750 001750 00000001155 12576733264 023365 0ustar00yryr000000 000000 #include "utest_helper.hpp" struct hop { int x, y; }; void compiler_argument_structure(void) { const size_t n = 2048; hop h = {3, 4}; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_argument_structure"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(hop), &h); // Run the kernel globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for (uint32_t i = 0; i < n; ++i) OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 7); } MAKE_UTEST_FROM_FUNCTION(compiler_argument_structure); Beignet-1.1.1-Source/utests/builtin_lgamma.cpp000664 001750 001750 00000001644 12576733264 020520 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" void builtin_lgamma(void) { const int n = 1024; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_lgamma"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; for (int j = 0; j < 1024; j++) { OCL_MAP_BUFFER(0); for (int i = 0; i < n; ++i) { src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 0.001f; } OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); float *dst = (float*) buf_data[1]; for (int i = 0; i < n; ++i) { float cpu = lgamma(src[i]); float gpu = dst[i]; if (fabsf(cpu - gpu) >= 1e-3) { printf("%f %f %f\n", src[i], cpu, gpu); OCL_ASSERT(0); } } OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION (builtin_lgamma); Beignet-1.1.1-Source/utests/utest_file_map.hpp000664 001750 001750 00000004626 12576733264 020544 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file assert.hpp * * \author Benjamin Segovia */ #ifndef __UTEST_FILE_MAP_HPP__ #define __UTEST_FILE_MAP_HPP__ #include "CL/cl.h" #include /* Map a file into memory for direct / cached / simple accesses */ typedef struct cl_file_map { void *start, *stop; /* First character and last one */ size_t size; /* Total size of the file */ int fd; /* Posix file descriptor */ cl_bool mapped; /* Indicate if a file was mapped or not */ char *name; /* File itself */ } cl_file_map_t; /* Report information about an open temptative */ enum { CL_FILE_MAP_SUCCESS = 0, CL_FILE_MAP_FILE_NOT_FOUND = 1, CL_FILE_MAP_FAILED_TO_MMAP = 2 }; /* Allocate and Initialize a file mapper (but do not map any file */ extern cl_file_map_t *cl_file_map_new(void); /* Initialize a file mapper (but do not map any file */ extern int cl_file_map_init(cl_file_map_t *fm); /* Destroy but do not deallocate a file map */ extern void cl_file_map_destroy(cl_file_map_t *fm); /* Destroy and free it */ extern void cl_file_map_delete(cl_file_map_t *fm); /* Open a file and returns the error code */ extern int cl_file_map_open(cl_file_map_t *fm, const char *name); static inline cl_bool cl_file_map_is_mapped(const cl_file_map_t *fm) { return fm->mapped; } static inline const char* cl_file_map_begin(const cl_file_map_t *fm) { return (const char*) fm->start; } static inline const char* cl_file_map_end(const cl_file_map_t *fm) { return (const char*) fm->stop; } static inline size_t cl_file_map_size(const cl_file_map_t *fm) { return fm->size; } #endif /* __UTEST_FILE_MAP_HPP__ */ Beignet-1.1.1-Source/utests/enqueue_fill_buf.cpp000664 001750 001750 00000004117 12576733264 021043 0ustar00yryr000000 000000 #include "utest_helper.hpp" #include static char pattern_serials[128]; static void test_fill_buf(size_t sz, size_t offset, size_t size, size_t pattern_sz) { unsigned int i; int ret = 0; OCL_MAP_BUFFER(0); memset(((char*)buf_data[0]), 0, sz); OCL_UNMAP_BUFFER(0); for (i=0; i < pattern_sz; i++) { pattern_serials[i] = (rand() & 63); } if (offset + size > sz) { /* Expect Error. */ OCL_ASSERT(clEnqueueFillBuffer(queue, buf[0], pattern_serials, pattern_sz, offset, size, 0, NULL, NULL)); return; } ret = clEnqueueFillBuffer(queue, buf[0], pattern_serials, pattern_sz, offset, size, 0, NULL, NULL); OCL_ASSERT(!ret); OCL_MAP_BUFFER(0); #if 0 printf("\n==== pattern size is %d, offset is %d, size is %d ====\n", pattern_sz, offset, size); printf("\n########### buffer: \n"); for (i = 0; i < sz; ++i) printf(" %2.2u", ((unsigned char*)buf_data[0])[i]); #endif // Check results int j = 0; for (i = 0; i < sz; ++i) { if (i < offset || i >= offset + size) { if (((char*)buf_data[0])[i] != 0) { printf ("\nnon zero index is %d\n", i); OCL_ASSERT(0); } continue; } if (((char*)buf_data[0])[i] != pattern_serials[j]) { printf ("\ndifferent index is %d\n", i); OCL_ASSERT(0); } j++; if (j == (int)pattern_sz) j = 0; } OCL_UNMAP_BUFFER(0); } void enqueue_fill_buf(void) { size_t offset; size_t pattern_sz; const size_t sz = 1024; size_t size = 0; static int valid_sz[] = {1, 2, 4, 8, 16, 32, 64, 128}; unsigned int i = 0; OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL); for (i = 0; i < sizeof(valid_sz)/sizeof(int); i++) { pattern_sz = valid_sz[i]; size = ((rand()%1024)/pattern_sz) * pattern_sz; offset = ((rand()%1024)/pattern_sz) * pattern_sz; while (size + offset + 1 > sz) { if (size > offset) { size = size - offset; } else offset = offset - size; } test_fill_buf(sz, offset, size, pattern_sz); } } MAKE_UTEST_FROM_FUNCTION(enqueue_fill_buf); Beignet-1.1.1-Source/utests/compiler_mandelbrot.cpp000664 001750 001750 00000003065 12576733264 021554 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ #include "utest_helper.hpp" static int *dst = NULL; static const size_t w = 256; static const size_t h = 256; static void compiler_mandelbrot(void) { const size_t global[2] = {w, h}; const size_t local[2] = {16, 1}; const size_t sz = w * h * sizeof(char[4]); OCL_CREATE_KERNEL("compiler_mandelbrot"); OCL_CREATE_BUFFER(buf[0], 0, sz, NULL); OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]); OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL); OCL_MAP_BUFFER(0); dst = (int *) buf_data[0]; /* Save the image (for debug purpose) */ cl_write_bmp(dst, w, h, "compiler_mandelbrot.bmp"); /* Compare with the golden image */ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_ref.bmp"); } MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot); Beignet-1.1.1-Source/utests/compiler_private_const.cpp000664 001750 001750 00000001061 12576733264 022277 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_private_const(void) { const size_t n = 16; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_private_const"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); globals[0] = n; locals[0] = n; // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(0); for (size_t i = 0; i < n; ++i) OCL_ASSERT(((int32_t*)buf_data[0])[i] == (int32_t)(i * 2)); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_private_const); Beignet-1.1.1-Source/utests/compiler_sub_group_shuffle.cpp000664 001750 001750 00000002211 12576733264 023136 0ustar00yryr000000 000000 #include "utest_helper.hpp" void compiler_sub_group_shuffle(void) { const size_t n = 32; const int32_t buf_size = 4 * n + 1; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_sub_group_shuffle"); OCL_CREATE_BUFFER(buf[0], 0, buf_size * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); int c = 3; OCL_SET_ARG(1, sizeof(int), &c); globals[0] = n; locals[0] = 16; OCL_MAP_BUFFER(0); for (int32_t i = 0; i < buf_size; ++i) ((int*)buf_data[0])[i] = -1; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(0); int* dst = (int *)buf_data[0]; int suggroupsize = dst[0]; OCL_ASSERT(suggroupsize == 8 || suggroupsize == 16); dst++; for (int32_t i = 0; i < (int32_t) n; ++i){ int round = i / suggroupsize; int index = i % suggroupsize; OCL_ASSERT(index == dst[4*i]); OCL_ASSERT((round * suggroupsize + c) == dst[4*i+1]); OCL_ASSERT((round * suggroupsize + 5) == dst[4*i+2]); OCL_ASSERT((round * suggroupsize + (suggroupsize - index - 1)) == dst[4*i+3]); } OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_shuffle); Beignet-1.1.1-Source/utests/compiler_long_shr.cpp000664 001750 001750 00000001760 12576733264 021240 0ustar00yryr000000 000000 #include #include #include #include "utest_helper.hpp" void compiler_long_shr(void) { const size_t n = 64; uint64_t src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_long_shr"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); globals[0] = n; locals[0] = 16; // Run random tests for (int32_t i = 0; i < (int32_t) n; ++i) src[i] = (uint64_t)1 << 63; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, sizeof(src)); OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Compare OCL_MAP_BUFFER(1); uint64_t *dest = ((uint64_t *)buf_data[1]); for (int32_t i = 0; i < (int32_t) n; ++i) if (i > 7) OCL_ASSERT(dest[i] == src[i] >> i); else OCL_ASSERT(dest[i] == src[i] + 1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(compiler_long_shr); Beignet-1.1.1-Source/utests/compiler_fill_image.cpp000664 001750 001750 00000002112 12576733264 021505 0ustar00yryr000000 000000 #include #include "utest_helper.hpp" static void compiler_fill_image(void) { const size_t w = 512; const size_t h = 512; uint32_t color = 0x12345678; cl_image_format format; cl_image_desc desc; memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, sizeof(cl_image_format)); format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_UNSIGNED_INT8; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = w; desc.image_height = h; desc.image_row_pitch = 0; // Setup kernel and images OCL_CREATE_KERNEL("test_fill_image"); OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(color), &color); globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; OCL_NDRANGE(2); // Check result OCL_MAP_BUFFER(0); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == 0x78563412); OCL_UNMAP_BUFFER(0); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image); Beignet-1.1.1-Source/utests/compiler_integer_remainder.cpp000664 001750 001750 00000002250 12576733264 023103 0ustar00yryr000000 000000 #include "utest_helper.hpp" static void cpu(int global_id, int *src, int *dst, int x) { dst[global_id] = src[global_id] % x; } void compiler_integer_remainder(void) { const size_t n = 16; int cpu_dst[16], cpu_src[16]; const int x = 7; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_integer_remainder"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(x), &x); globals[0] = 16; locals[0] = 16; // Run random tests for (uint32_t pass = 0; pass < 8; ++pass) { OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; OCL_UNMAP_BUFFER(0); // Run the kernel on GPU OCL_NDRANGE(1); // Run on CPU for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst, x); // Compare OCL_MAP_BUFFER(1); for (int32_t i = 0; i < 11; ++i) OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); OCL_UNMAP_BUFFER(1); } } MAKE_UTEST_FROM_FUNCTION(compiler_integer_remainder); Beignet-1.1.1-Source/utests/builtin_modf.cpp000664 001750 001750 00000002705 12576733264 020206 0ustar00yryr000000 000000 #include #include #include "utest_helper.hpp" void builtin_modf(void) { const int n = 32; float src[n]; // Setup kernel and buffers OCL_CREATE_KERNEL("builtin_modf"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); globals[0] = n; locals[0] = 16; src[0] = INFINITY; src[1] = -INFINITY; src[2] = nanf(""); src[3] = 0; src[4] = 1.5f; src[5] = 2.5f; src[6] = -2.5f; src[7] = 20; src[8] = 21; src[9] = 89.5f; OCL_MAP_BUFFER(0); memcpy(buf_data[0], src, n * sizeof(float)); OCL_UNMAP_BUFFER(0); OCL_NDRANGE(1); OCL_MAP_BUFFER(1); OCL_MAP_BUFFER(2); float *dst = (float *)buf_data[1]; float *it = (float *)buf_data[2]; OCL_ASSERT(dst[0] == 0 && it[0] == INFINITY); OCL_ASSERT(dst[1] == -0.f && it[1] == -INFINITY); OCL_ASSERT(isnanf(dst[2]) && isnanf(it[2])); OCL_ASSERT(dst[3] == 0 && it[3] == 0); OCL_ASSERT(dst[4] == 0.5f && it[4] == 1); OCL_ASSERT(dst[5] == 0.5f && it[5] == 2); OCL_ASSERT(dst[6] == -0.5f && it[6] == -2); OCL_ASSERT(dst[7] == 0 && it[7] == 20); OCL_ASSERT(dst[8] == 0 && it[8] == 21); OCL_ASSERT(dst[9] == 0.5f && it[9] == 89); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2); } MAKE_UTEST_FROM_FUNCTION(builtin_modf); Beignet-1.1.1-Source/utests/utest_run.cpp000664 001750 001750 00000006027 12600456606 017552 0ustar00yryr000000 000000 /* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file utest_run.cpp * \author Benjamin Segovia * * Just run the unit tests. The user can possibly provides the subset of it */ #include "utest_helper.hpp" #include "utest_exception.hpp" #include #include static const char *shortopts = "c:lanh"; struct option longopts[] = { {"casename", required_argument, NULL, 'c'}, {"list", no_argument, NULL, 'l'}, {"all", no_argument, NULL, 'a'}, {"allnoissue", no_argument, NULL, 'n'}, {"help", no_argument, NULL, 'h'}, {0, 0, 0, 0}, }; void usage() { std::cout << "\ Usage:\n\ ./utest_run