basicsplit.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2015,2017 CNRS
  4. * Copyright (C) 2010,2011,2013 Université de Bordeaux
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <stdio.h>
  18. #include <stdlib.h>
  19. #include <string.h>
  20. #include <unistd.h>
  21. #ifdef __APPLE_CC__
  22. #include <OpenCL/opencl.h>
  23. #else
  24. #include <CL/cl.h>
  25. #endif
  26. #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)
  27. #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)
  28. #ifdef UNUSED
  29. #elif defined(__GNUC__)
  30. # define UNUSED(x) UNUSED_ ## x __attribute__((unused))
  31. #else
  32. # define UNUSED(x) x
  33. #endif
  34. #define SIZE 1024
  35. #define TYPE float
  36. #define REALSIZE (SIZE * sizeof(TYPE))
  37. const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \
  38. size_t x = get_global_id(0);\n\
  39. size_t y = get_global_id(1);\n\
  40. size_t w = get_global_size(0); \n\
  41. int idx = y*w+x; \n\
  42. #ifdef SOCL_DEVICE_TYPE_GPU \n\
  43. d[idx] = s1[idx] + s2[idx];\n\
  44. #endif \n\
  45. #ifdef SOCL_DEVICE_TYPE_CPU \n\
  46. d[idx] = s1[idx] + 2* s2[idx];\n\
  47. #endif \n\
  48. #ifdef SOCL_DEVICE_TYPE_ACCELERATOR \n\
  49. d[idx] = s1[idx] + 3 * s2[idx];\n\
  50. #endif \n\
  51. #ifdef SOCL_DEVICE_TYPE_UNKNOWN \n\
  52. d[idx] = s1[idx] + 4 * s2[idx];\n\
  53. #endif \n\
  54. }";
  55. cl_kernel kernel;
  56. cl_context context;
  57. TYPE s1[SIZE],s2[SIZE],d[SIZE];
  58. typedef cl_int (*split_func_t)(cl_command_queue, cl_uint, cl_uint, const size_t *, const size_t *, const size_t *, const cl_event, cl_event *);
  59. void add(cl_command_queue cq, cl_uint size, TYPE * s1, TYPE *s2, TYPE*d, cl_uint num_events, cl_event * events, cl_event *event) {
  60. cl_int err;
  61. printf("Creating buffers...\n");
  62. cl_mem s1m = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), s1, &err);
  63. check(err, "clCreateBuffer s1");
  64. cl_mem s2m = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), s2, &err);
  65. check(err, "clCreateBuffer s2");
  66. cl_mem dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, size * sizeof(TYPE), d, &err);
  67. check(err, "clCreateBuffer d");
  68. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m);
  69. check(err, "clSetKernelArg 0");
  70. err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m);
  71. check(err, "clSetKernelArg 1");
  72. err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm);
  73. check(err, "clSetKernelArg 2");
  74. printf("Enqueueing NDRangeKernel...\n");
  75. size_t local[3] = {16, 1, 1};
  76. size_t global[3] = {size, 1, 1};
  77. cl_event eventK;
  78. err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, num_events, events, &eventK);
  79. check(err, "clEnqueueNDRangeKernel");
  80. clEnqueueMapBuffer(cq, dm, CL_FALSE, CL_MAP_READ, 0, size * sizeof(TYPE), 1, &eventK, event, &err);
  81. check(err, "clEnqueueMapBuffer");
  82. clReleaseMemObject(s1m);
  83. clReleaseMemObject(s2m);
  84. clReleaseMemObject(dm);
  85. }
  86. cl_int split_func(cl_command_queue cq, cl_uint split_factor, void * data, cl_event before, cl_event * after) {
  87. cl_event evs[split_factor];
  88. printf("Partition with factor %d\n", split_factor);
  89. cl_uint size = ((SIZE)/split_factor) - (SIZE/split_factor % 16);
  90. cl_uint i;
  91. for (i=0; i<split_factor; i++) {
  92. cl_uint offset = size * i;
  93. add(cq, size, &s1[offset], &s2[offset], &d[offset], 1, &before, &evs[i]);
  94. }
  95. clEnqueueMarkerWithWaitList(cq, split_factor, evs, after);
  96. return CL_SUCCESS;
  97. }
  98. int main(int UNUSED(argc), char** UNUSED(argv)) {
  99. cl_platform_id platforms[15];
  100. cl_uint num_platforms;
  101. cl_device_id devices[15];
  102. cl_uint num_devices;
  103. cl_program program;
  104. cl_command_queue cq;
  105. cl_int err;
  106. unsigned int i;
  107. {
  108. for (i=0; i<SIZE; i++) {
  109. s1[i] = 2.0;
  110. s2[i] = 7.0;
  111. d[i] = 98.0;
  112. }
  113. }
  114. printf("Querying platform...\n");
  115. err = clGetPlatformIDs(0, NULL, &num_platforms);
  116. if (num_platforms == 0) {
  117. printf("No OpenCL platform found.\n");
  118. exit(77);
  119. }
  120. err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, NULL);
  121. check(err, "clGetPlatformIDs");
  122. unsigned int platform_idx = -1;
  123. for (i=0; i<num_platforms;i++) {
  124. char vendor[256];
  125. clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL);
  126. if (strcmp(vendor, "Inria") == 0) {
  127. platform_idx = i;
  128. }
  129. }
  130. if (platform_idx == -1) {
  131. printf("SOCL platform not found.\n");
  132. exit(77);
  133. }
  134. printf("Querying devices...\n");
  135. err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_ALL, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
  136. check(err, "clGetDeviceIDs");
  137. if (num_devices == 0) {
  138. printf("No OpenCL device found\n");
  139. exit(77);
  140. }
  141. printf("Creating context...\n");
  142. cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};
  143. context = clCreateContext(properties, num_devices, devices, NULL, NULL, &err);
  144. check(err, "clCreateContext");
  145. printf("Creating program...\n");
  146. program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);
  147. check(err, "clCreateProgram");
  148. printf("Building program...\n");
  149. err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  150. check(err, "clBuildProgram");
  151. printf("Creating kernel...\n");
  152. kernel = clCreateKernel(program, "add", &err);
  153. check(err, "clCreateKernel");
  154. printf("Creating command queue...\n");
  155. cq = clCreateCommandQueue(context, NULL, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
  156. check(err, "clCreateCommandQueue");
  157. printf("Setting split parameters...\n");
  158. err = clSetKernelArg(kernel, -1, sizeof(void*), split_func);
  159. check(err, "clSetKernelArg split func");
  160. cl_uint split_space = 10;
  161. err = clSetKernelArg(kernel, -2, sizeof(void*), &split_space);
  162. check(err, "clSetKernelArg split space");
  163. cl_uint niter = 15;
  164. for (i=0; i<niter; i++) {
  165. printf("Iteration %u...\n", i);
  166. add(cq, SIZE, s1, s2, d, 0, NULL, NULL);
  167. printf("Finishing iteration...\n");
  168. clFinish(cq);
  169. }
  170. printf("Data...\n");
  171. {
  172. int i;
  173. for (i=0; i<SIZE; i++) {
  174. printf("%f ", d[i]);
  175. }
  176. printf("\n");
  177. }
  178. #ifdef PROFILING
  179. #define DURATION(event,label) do { \
  180. cl_ulong t0,t1; \
  181. err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t0, NULL);\
  182. check(err, "clGetEventProfilingInfo");\
  183. err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t1, NULL);\
  184. check(err, "clGetEventProfilingInfo");\
  185. printf("Profiling %s: %lu nanoseconds\n", label, t1-t0);\
  186. } while (0);
  187. DURATION(eventW1, "first buffer writing");
  188. DURATION(eventW2, "second buffer writing");
  189. DURATION(eventK, "kernel execution");
  190. DURATION(eventR, "result buffer reading");
  191. #endif
  192. printf("Releasing command queue...\n");
  193. err = clReleaseCommandQueue(cq);
  194. check(err, "clReleaseCommandQueue");
  195. printf("Releasing kernel...\n");
  196. err = clReleaseKernel(kernel);
  197. check(err, "clReleaseKernel");
  198. printf("Releasing program...\n");
  199. err = clReleaseProgram(program);
  200. check(err, "clReleaseProgram");
  201. printf("Releasing context...\n");
  202. err = clReleaseContext(context);
  203. check(err, "clReleaseContext");
  204. #ifdef HAVE_CLGETEXTENSIONFUNCTIONADDRESSFORPLATFORM
  205. void (*clShutdown)(void) = clGetExtensionFunctionAddressForPlatform(platforms[platform_idx], "clShutdown");
  206. if (clShutdown != NULL) {
  207. printf("Calling clShutdown :)\n");
  208. clShutdown();
  209. }
  210. #endif
  211. return 0;
  212. }