mansched.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012,2013,2015,2017,2019 CNRS
  4. * Copyright (C) 2010-2012 Université de Bordeaux
  5. * Copyright (C) 2012 Inria
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <unistd.h>
  22. #ifdef __APPLE_CC__
  23. #include <OpenCL/opencl.h>
  24. #else
  25. #include <CL/cl.h>
  26. #endif
  27. #define error(...) do { fprintf(stderr, "Error: " __VA_ARGS__); exit(EXIT_FAILURE); } while(0)
  28. #define check(err, str) do { if(err != CL_SUCCESS) { fprintf(stderr, "OpenCL Error (%d): %s\n",err, str); exit(EXIT_FAILURE); }} while(0)
  29. #ifdef UNUSED
  30. #elif defined(__GNUC__)
  31. # define UNUSED(x) UNUSED_ ## x __attribute__((unused))
  32. #else
  33. # define UNUSED(x) x
  34. #endif
  35. #define SIZE 1024
  36. #define TYPE float
  37. #define REALSIZE (SIZE * sizeof(TYPE))
  38. const char * kernel_src = "__kernel void add(__global float*s1, __global float*s2, __global float*d) { \
  39. size_t x = get_global_id(0);\
  40. size_t y = get_global_id(1);\
  41. size_t w = get_global_size(0); \
  42. int idx = y*w+x; \
  43. d[idx] = s1[idx] + s2[idx];\
  44. }";
  45. int main(int UNUSED(argc), char** UNUSED(argv)) {
  46. cl_platform_id platforms[15];
  47. cl_uint num_platforms;
  48. cl_device_id devices[15];
  49. cl_uint num_devices;
  50. cl_context context;
  51. cl_program program;
  52. cl_kernel kernel;
  53. cl_mem s1m, s2m, dm;
  54. cl_command_queue cq;
  55. unsigned int d;
  56. cl_int err;
  57. TYPE s1[SIZE],s2[SIZE],dst[SIZE];
  58. {
  59. int i;
  60. for (i=0; i<SIZE; i++) {
  61. s1[i] = 2.0;
  62. s2[i] = 7.0;
  63. dst[i] = 98.0;
  64. }
  65. }
  66. printf("Querying platform...\n");
  67. clGetPlatformIDs(0, NULL, &num_platforms);
  68. if (num_platforms == 0) {
  69. printf("No OpenCL platform found.\n");
  70. exit(77);
  71. }
  72. err = clGetPlatformIDs(sizeof(platforms)/sizeof(cl_platform_id), platforms, &num_platforms);
  73. check(err, "clGetPlatformIDs");
  74. printf("Querying devices...\n");
  75. unsigned int platform_idx;
  76. for (platform_idx=0; platform_idx<num_platforms; platform_idx++) {
  77. err = clGetDeviceIDs(platforms[platform_idx], CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU, sizeof(devices)/sizeof(cl_device_id), devices, &num_devices);
  78. check(err, "clGetDeviceIDs");
  79. for (d=0; d<num_devices; d++) {
  80. printf("Creating context...\n");
  81. cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[platform_idx], 0};
  82. context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
  83. check(err, "clCreateContext");
  84. printf("Creating program...\n");
  85. program = clCreateProgramWithSource(context, 1, &kernel_src, NULL, &err);
  86. check(err, "clCreateProgram");
  87. printf("Building program...\n");
  88. err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  89. check(err, "clBuildProgram");
  90. printf("Creating kernel...\n");
  91. kernel = clCreateKernel(program, "add", &err);
  92. check(err, "clCreateKernel");
  93. printf("Creating buffers...\n");
  94. s1m = clCreateBuffer(context, CL_MEM_READ_WRITE, REALSIZE, NULL, &err);
  95. check(err, "clCreateBuffer s1");
  96. s2m = clCreateBuffer(context, CL_MEM_READ_ONLY, REALSIZE, NULL, &err);
  97. check(err, "clCreateBuffer s2");
  98. dm = clCreateBuffer(context, CL_MEM_WRITE_ONLY, REALSIZE, NULL, &err);
  99. check(err, "clCreateBuffer dst");
  100. printf("Creating command queue...\n");
  101. cl_event eventW1, eventW2, eventK, eventR;
  102. #ifdef PROFILING
  103. cq = clCreateCommandQueue(context, devices[d], CL_QUEUE_PROFILING_ENABLE, &err);
  104. #else
  105. cq = clCreateCommandQueue(context, devices[d], 0, &err);
  106. #endif
  107. check(err, "clCreateCommandQueue");
  108. printf("Enqueueing WriteBuffers...\n");
  109. err = clEnqueueWriteBuffer(cq, s1m, CL_FALSE, 0, REALSIZE, s1, 0, NULL, &eventW1);
  110. check(err, "clEnqueueWriteBuffer s1");
  111. err = clEnqueueWriteBuffer(cq, s2m, CL_FALSE, 0, REALSIZE, s2, 0, NULL, &eventW2);
  112. check(err, "clEnqueueWriteBuffer s2");
  113. printf("Setting kernel arguments...\n");
  114. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &s1m);
  115. check(err, "clSetKernelArg 0");
  116. err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &s2m);
  117. check(err, "clSetKernelArg 1");
  118. err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dm);
  119. check(err, "clSetKernelArg 2");
  120. printf("Enqueueing NDRangeKernel...\n");
  121. size_t local[3] = {16, 1, 1};
  122. size_t global[3] = {1024, 1, 1};
  123. cl_event deps[] = {eventW1,eventW2};
  124. err = clEnqueueNDRangeKernel(cq, kernel, 3, NULL, global, local, 2, deps, &eventK);
  125. check(err, "clEnqueueNDRangeKernel");
  126. printf("Enqueueing ReadBuffer...\n");
  127. err = clEnqueueReadBuffer(cq, dm, CL_FALSE, 0, REALSIZE, dst, 0, NULL, &eventR);
  128. check(err, "clEnqueueReadBuffer");
  129. printf("Finishing queue...\n");
  130. clFinish(cq);
  131. printf("Data...\n");
  132. {
  133. int i;
  134. for (i=0; i<SIZE; i++) {
  135. printf("%f ", dst[i]);
  136. }
  137. printf("\n");
  138. }
  139. #ifdef PROFILING
  140. #define DURATION(event,label) do { \
  141. cl_ulong t0,t1; \
  142. err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t0, NULL);\
  143. check(err, "clGetEventProfilingInfo");\
  144. err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t1, NULL);\
  145. check(err, "clGetEventProfilingInfo");\
  146. printf("Profiling %s: %lu nanoseconds\n", label, t1-t0);\
  147. } while (0);
  148. DURATION(eventW1, "first buffer writing");
  149. DURATION(eventW2, "second buffer writing");
  150. DURATION(eventK, "kernel execution");
  151. DURATION(eventR, "result buffer reading");
  152. #endif
  153. printf("Releasing events...\n");
  154. err = clReleaseEvent(eventW1);
  155. err |= clReleaseEvent(eventW2);
  156. err |= clReleaseEvent(eventK);
  157. err |= clReleaseEvent(eventR);
  158. check(err, "clReleaseCommandQueue");
  159. printf("Releasing command queue...\n");
  160. err = clReleaseCommandQueue(cq);
  161. check(err, "clReleaseCommandQueue");
  162. printf("Releasing buffers...\n");
  163. err = clReleaseMemObject(s1m);
  164. check(err, "clReleaseMemObject s1");
  165. err = clReleaseMemObject(s2m);
  166. check(err, "clReleaseMemObject s2");
  167. err = clReleaseMemObject(dm);
  168. check(err, "clReleaseMemObject d");
  169. printf("Releasing kernel...\n");
  170. err = clReleaseKernel(kernel);
  171. check(err, "clReleaseKernel");
  172. printf("Releasing program...\n");
  173. err = clReleaseProgram(program);
  174. check(err, "clReleaseProgram");
  175. printf("Releasing context...\n");
  176. err = clReleaseContext(context);
  177. check(err, "clReleaseContext");
  178. }
  179. }
  180. return 0;
  181. }