/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010,2011 University of Bordeaux * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <CL/cl.h> #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0) #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0) #ifdef UNUSED #elif defined(__GNUC__) # define UNUSED(x) UNUSED_ ## x __attribute__((unused)) #else # define UNUSED(x) x #endif #define SIZE 1024 #define TYPE float #define REALSIZE (SIZE * sizeof(TYPE)) const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \ size_t x = get_global_id(0);\n\ size_t y = get_global_id(1);\n\ size_t w = get_global_size(0); \n\ int idx = y*w+x; \n\ #ifdef SOCL_DEVICE_TYPE_GPU \n\ d[idx] = s1[idx] + s2[idx];\n\ #endif \n\ #ifdef SOCL_DEVICE_TYPE_CPU \n\ d[idx] = s1[idx] + 2* s2[idx];\n\ #endif \n\ #ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n\ d[idx] = s1[idx] + 3 * s2[idx];\n\ #endif \n\ #ifdef SOCL_DEVICE_TYPE_UNKNOWN \n\ d[idx] = s1[idx] + 4 * s2[idx];\n\ #endif \n\ }"; int main(int UNUSED(argc), char** UNUSED(argv)) { cl_platform_id platforms[15]; cl_uint num_platforms; cl_device_id devices[15]; cl_uint num_devices; cl_context context; cl_program program; cl_kernel kernel; cl_mem s1m, s2m, dm; cl_command_queue cq; cl_int err; TYPE s1[SIZE],s2[SIZE],d[SIZE]; { int i; for (i=0; i<SIZE; i++) { s1[i] = 2.0; s2[i] = 7.0; d[i] = 98.0; } } printf("Querying platform...\n"); err = clGetPlatformIDs(0, NULL, &num_platforms); if (num_platforms == 0) { printf("No OpenCL platform found.\n"); exit(77); } err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL); check(err, "clGetPlatformIDs"); printf("Querying devices...\n"); unsigned int platform_idx; for (platform_idx=0; platform_idx<num_platforms; platform_idx++) { err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_ALL, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices); check(err, "clGetDeviceIDs"); if (num_devices != 0) break; } if (num_devices == 0) { printf("No OpenCL device found\n"); exit(77); } printf("Creating context...\n"); cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0}; context = clCreateContext(properties, num_devices, devices, NULL, NULL, &err); check(err, "clCreateContext"); printf("Creating program...\n"); program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err); check(err, "clCreateProgram"); printf("Building program...\n"); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); check(err, "clBuildProgram"); printf("Creating kernel...\n"); kernel = clCreateKernel(program, "add", &err); check(err, "clCreateKernel"); printf("Creating buffers...\n"); s1m = clCreateBuffer(context, CL_MEM_READ_WRITE, REALSIZE, NULL, &err); check(err, "clCreateBuffer s1"); s2m = clCreateBuffer(context, CL_MEM_READ_ONLY, REALSIZE, NULL, &err); check(err, "clCreateBuffer s2"); dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY, REALSIZE, NULL, &err); check(err, "clCreateBuffer d"); printf("Creating command queue...\n"); cl_event eventW1, eventW2, eventK, eventR; #ifdef PROFILING cq = clCreateCommandQueue(context, NULL, CL_QUEUE_PROFILING_ENABLE, &err); #else cq = clCreateCommandQueue(context, NULL, 0, &err); #endif check(err, "clCreateCommandQueue"); printf("Enqueueing WriteBuffers...\n"); err = clEnqueueWriteBuffer(cq, s1m, CL_FALSE, 0, REALSIZE, s1, 0, NULL, &eventW1); check(err, "clEnqueueWriteBuffer s1"); err = clEnqueueWriteBuffer(cq, s2m, CL_FALSE, 0, REALSIZE, s2, 0, NULL, &eventW2); check(err, "clEnqueueWriteBuffer s2"); printf("Setting kernel arguments...\n"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m); check(err, "clSetKernelArg 0"); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m); check(err, "clSetKernelArg 1"); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm); check(err, "clSetKernelArg 2"); printf("Enqueueing NDRangeKernel...\n"); size_t local[3] = {16, 1, 1}; size_t global[3] = {1024, 1, 1}; cl_event deps[] = {eventW1,eventW2}; err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, 2, deps, &eventK); check(err, "clEnqueueNDRangeKernel"); printf("Enqueueing ReadBuffer...\n"); err = clEnqueueReadBuffer(cq, dm, CL_FALSE, 0, REALSIZE, d, 0, NULL, &eventR); check(err, "clEnqueueReadBuffer"); printf("Finishing queue...\n"); clFinish(cq); printf("Data...\n"); { int i; for (i=0; i<SIZE; i++) { printf("%f ", d[i]); } printf("\n"); } #ifdef PROFILING #define DURATION(event,label) do { \ cl_ulong t0,t1; \ err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t0, NULL);\ check(err, "clGetEventProfilingInfo");\ err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t1, NULL);\ check(err, "clGetEventProfilingInfo");\ printf("Profiling %s: %lu nanoseconds\n", label, t1-t0);\ } while (0); DURATION(eventW1, "first buffer writing"); DURATION(eventW2, "second buffer writing"); DURATION(eventK, "kernel execution"); DURATION(eventR, "result buffer reading"); #endif printf("Releasing events...\n"); err = clReleaseEvent(eventW1); err |= clReleaseEvent(eventW2); err |= clReleaseEvent(eventK); err |= clReleaseEvent(eventR); check(err, "clReleaseCommandQueue"); printf("Releasing command queue...\n"); err = clReleaseCommandQueue(cq); check(err, "clReleaseCommandQueue"); printf("Releasing buffers...\n"); err = clReleaseMemObject(s1m); check(err, "clReleaseMemObject s1"); err = clReleaseMemObject(s2m); check(err, "clReleaseMemObject s2"); err = clReleaseMemObject(dm); check(err, "clReleaseMemObject d"); printf("Releasing kernel...\n"); err = clReleaseKernel(kernel); check(err, "clReleaseKernel"); printf("Releasing program...\n"); err = clReleaseProgram(program); check(err, "clReleaseProgram"); printf("Releasing context...\n"); err = clReleaseContext(context); check(err, "clReleaseContext"); return 0; }