gpu_register.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 Université de Bordeaux 1
  4. * Copyright (C) 2012 inria
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <starpu.h>
  18. #include <starpu_opencl.h>
  19. #include <starpu_cuda.h>
  20. #include "../helper.h"
  21. #include "scal.h"
  22. static int
  23. submit_tasks(starpu_data_handle_t handle, int pieces, int n)
  24. {
  25. int i, ret;
  26. for (i = 0; i < pieces; i++)
  27. {
  28. struct starpu_task *task = starpu_task_create();
  29. task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
  30. task->cl = &scal_codelet;
  31. task->execute_on_a_specific_worker = 1;
  32. task->workerid = i%n;
  33. ret = starpu_task_submit(task);
  34. if (ret == -ENODEV)
  35. return -ENODEV;
  36. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  37. }
  38. ret = starpu_task_wait_for_all();
  39. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
  40. return 0;
  41. }
  42. static int
  43. find_a_worker(enum starpu_archtype type)
  44. {
  45. int worker;
  46. int ret = starpu_worker_get_ids_by_type(type, &worker, 1);
  47. if (ret == 0)
  48. return -ENODEV;
  49. return worker;
  50. }
  51. static int
  52. check_result(unsigned *t, size_t size)
  53. {
  54. int i;
  55. for (i = 0; i < size; i++)
  56. {
  57. if (t[i] != i*2)
  58. {
  59. FPRINTF(stderr,"t[%d] is %d instead of %d\n", i, t[i], 2*i);
  60. return 1;
  61. }
  62. }
  63. return 0;
  64. }
  65. #ifdef STARPU_USE_CUDA
  66. #if CUDART_VERSION >= 4000
  67. static int
  68. test_cuda(void)
  69. {
  70. int ret;
  71. unsigned *foo_gpu;
  72. unsigned *foo;
  73. int n, i, size, pieces;
  74. int devid;
  75. int chosen;
  76. cudaError_t cures;
  77. starpu_data_handle_t handle;
  78. /* Find a CUDA worker */
  79. chosen = find_a_worker(STARPU_CUDA_WORKER);
  80. if (chosen == -ENODEV)
  81. return -ENODEV;
  82. n = starpu_worker_get_count();
  83. size = 10 * n;
  84. devid = starpu_worker_get_devid(chosen);
  85. cudaSetDevice(devid);
  86. cudaMalloc((void**)&foo_gpu, size * sizeof(*foo_gpu));
  87. foo = calloc(size, sizeof(*foo));
  88. for (i = 0; i < size; i++)
  89. foo[i] = i;
  90. cures = cudaMemcpy(foo_gpu, foo, size * sizeof(*foo_gpu), cudaMemcpyHostToDevice);
  91. if (STARPU_UNLIKELY(cures))
  92. STARPU_CUDA_REPORT_ERROR(cures);
  93. starpu_vector_data_register(&handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, size, sizeof(*foo_gpu));
  94. /* Broadcast the data to force in-place partitioning */
  95. for (i = 0; i < n; i++)
  96. starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);
  97. /* Even with just one worker, split in at least two */
  98. if (n == 1)
  99. pieces = 2;
  100. else
  101. pieces = n;
  102. struct starpu_data_filter f =
  103. {
  104. .filter_func = starpu_block_filter_func_vector,
  105. .nchildren = pieces,
  106. };
  107. starpu_data_partition(handle, &f);
  108. ret = submit_tasks(handle, pieces, n);
  109. if (ret == -ENODEV)
  110. return -ENODEV;
  111. starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen));
  112. starpu_data_unregister(handle);
  113. cudaSetDevice(devid);
  114. cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost);
  115. if (STARPU_UNLIKELY(cures))
  116. STARPU_CUDA_REPORT_ERROR(cures);
  117. return check_result(foo, size);
  118. }
  119. #endif
  120. #endif
  121. #ifdef STARPU_USE_OPENCL
  122. static int
  123. test_opencl(void)
  124. {
  125. int i;
  126. int ret;
  127. int chosen;
  128. int n;
  129. int size;
  130. int pieces;
  131. cl_mem foo_gpu;
  132. starpu_data_handle_t handle;
  133. ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
  134. STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
  135. /* Find an OpenCL worker */
  136. chosen = find_a_worker(STARPU_OPENCL_WORKER);
  137. if (chosen == -ENODEV)
  138. return -ENODEV;
  139. n = starpu_worker_get_count();
  140. size = 10 * n;
  141. int devid;
  142. cl_int err;
  143. cl_context context;
  144. cl_command_queue queue;
  145. devid = starpu_worker_get_devid(chosen);
  146. starpu_opencl_get_context(devid, &context);
  147. starpu_opencl_get_queue(devid, &queue);
  148. foo_gpu = clCreateBuffer(context, CL_MEM_READ_WRITE, size*sizeof(int), NULL, &err);
  149. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  150. STARPU_OPENCL_REPORT_ERROR(err);
  151. unsigned int *foo = malloc(size*sizeof(*foo));
  152. for (i = 0; i < size; i++)
  153. foo[i] = i;
  154. err = clEnqueueWriteBuffer(queue,
  155. foo_gpu,
  156. CL_FALSE,
  157. 0,
  158. size*sizeof(int),
  159. foo,
  160. 0,
  161. NULL,
  162. NULL);
  163. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  164. STARPU_OPENCL_REPORT_ERROR(err);
  165. clFinish(queue);
  166. starpu_vector_data_register(&handle,
  167. starpu_worker_get_memory_node(chosen),
  168. (uintptr_t)foo_gpu,
  169. size,
  170. sizeof(int));
  171. /* Broadcast the data to force in-place partitioning */
  172. for (i = 0; i < n; i++)
  173. starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);
  174. /* Even with just one worker, split in at least two */
  175. if (n == 1)
  176. pieces = 2;
  177. else
  178. pieces = n;
  179. struct starpu_data_filter f =
  180. {
  181. .filter_func = starpu_block_filter_func_vector,
  182. .nchildren = pieces,
  183. };
  184. starpu_data_partition(handle, &f);
  185. ret = submit_tasks(handle, pieces, n);
  186. if (ret == -ENODEV)
  187. return -ENODEV;
  188. starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen));
  189. starpu_data_unregister(handle);
  190. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
  191. ret = starpu_opencl_unload_opencl(&opencl_program);
  192. STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
  193. err = clEnqueueReadBuffer(queue,
  194. foo_gpu,
  195. CL_FALSE,
  196. 0,
  197. size*sizeof(*foo),
  198. foo,
  199. 0,
  200. NULL,
  201. NULL);
  202. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  203. STARPU_OPENCL_REPORT_ERROR(err);
  204. clFinish(queue);
  205. return check_result(foo, size);
  206. }
  207. #endif /* !STARPU_USE_OPENCL */
  208. int main(int argc, char **argv)
  209. {
  210. int skipped_cuda = 1, skipped_opencl = 1;
  211. int ret;
  212. ret = starpu_init(NULL);
  213. if (ret == -ENODEV)
  214. return STARPU_TEST_SKIPPED;
  215. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  216. #ifdef STARPU_USE_CUDA
  217. #if CUDART_VERSION >= 4000 /* We need thread-safety of CUDA */
  218. ret = test_cuda();
  219. if (ret == 1)
  220. goto fail;
  221. else if (ret == 0)
  222. skipped_cuda = 0;
  223. #endif
  224. #endif
  225. #ifdef STARPU_USE_OPENCL
  226. ret = test_opencl();
  227. if (ret == 1)
  228. goto fail;
  229. else if (ret == 0)
  230. skipped_opencl = 0;
  231. #endif
  232. starpu_shutdown();
  233. if (skipped_cuda == 1 && skipped_opencl == 1)
  234. return STARPU_TEST_SKIPPED;
  235. return EXIT_SUCCESS;
  236. fail:
  237. starpu_shutdown();
  238. return EXIT_FAILURE;
  239. }