coo_interface.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012 inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/fxt.h>
  18. #include <datawizard/memalloc.h>
  19. static int
  20. copy_ram_to_ram(void *src_interface, STARPU_ATTRIBUTE_UNUSED unsigned src_node,
  21. void *dst_interface, STARPU_ATTRIBUTE_UNUSED unsigned dst_node)
  22. {
  23. size_t size = 0;
  24. struct starpu_coo_interface *src_coo, *dst_coo;
  25. src_coo = (struct starpu_coo_interface *) src_interface;
  26. dst_coo = (struct starpu_coo_interface *) dst_interface;
  27. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  28. memcpy((void *) dst_coo->columns, (void *) src_coo->columns, size);
  29. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  30. memcpy((void *) dst_coo->rows, (void *) src_coo->rows, size);
  31. size = src_coo->n_values * src_coo->elemsize;
  32. memcpy((void *) dst_coo->values, (void *) src_coo->values, size);
  33. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  34. src_coo->n_values *
  35. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  36. return 0;
  37. }
  38. #ifdef STARPU_USE_CUDA
  39. static int
  40. copy_cuda_async_sync(void *src_interface, unsigned src_node,
  41. void *dst_interface, unsigned dst_node,
  42. cudaStream_t stream, enum cudaMemcpyKind kind)
  43. {
  44. int ret;
  45. size_t size = 0;
  46. struct starpu_coo_interface *src_coo, *dst_coo;
  47. src_coo = (struct starpu_coo_interface *) src_interface;
  48. dst_coo = (struct starpu_coo_interface *) dst_interface;
  49. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  50. ret = starpu_cuda_copy_async_sync(
  51. (void *) src_coo->columns,
  52. src_node,
  53. (void *) dst_coo->columns,
  54. dst_node,
  55. size,
  56. stream,
  57. kind);
  58. if (ret == 0)
  59. stream = NULL;
  60. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  61. ret = starpu_cuda_copy_async_sync(
  62. (void *) src_coo->rows,
  63. src_node,
  64. (void *) dst_coo->rows,
  65. dst_node,
  66. size,
  67. stream,
  68. kind);
  69. if (ret == 0)
  70. stream = NULL;
  71. size = src_coo->n_values * src_coo->elemsize;
  72. ret = starpu_cuda_copy_async_sync(
  73. (void *) src_coo->values,
  74. src_node,
  75. (void *) dst_coo->values,
  76. dst_node,
  77. size,
  78. stream,
  79. kind);
  80. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  81. src_coo->n_values *
  82. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  83. return ret;
  84. }
  85. static int
  86. copy_ram_to_cuda(void *src_interface, unsigned src_node,
  87. void *dst_interface, unsigned dst_node)
  88. {
  89. return copy_cuda_async_sync(src_interface, src_node,
  90. dst_interface, dst_node,
  91. NULL, cudaMemcpyHostToDevice);
  92. }
  93. static int
  94. copy_cuda_to_ram(void *src_interface, unsigned src_node,
  95. void *dst_interface, unsigned dst_node)
  96. {
  97. return copy_cuda_async_sync(src_interface, src_node,
  98. dst_interface, dst_node,
  99. NULL, cudaMemcpyDeviceToHost);
  100. }
  101. static int
  102. copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
  103. void *dst_interface, unsigned dst_node,
  104. cudaStream_t stream)
  105. {
  106. return copy_cuda_async_sync(src_interface, src_node,
  107. dst_interface, dst_node,
  108. stream, cudaMemcpyHostToDevice);
  109. }
  110. static int
  111. copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
  112. void *dst_interface, unsigned dst_node,
  113. cudaStream_t stream)
  114. {
  115. return copy_cuda_async_sync(src_interface, src_node,
  116. dst_interface, dst_node,
  117. stream, cudaMemcpyDeviceToHost);
  118. }
  119. static int
  120. copy_cuda_to_cuda(void *src_interface, unsigned src_node,
  121. void *dst_interface, unsigned dst_node)
  122. {
  123. return copy_cuda_async_sync(src_interface, src_node,
  124. dst_interface, dst_node,
  125. NULL, cudaMemcpyDeviceToDevice);
  126. }
  127. #ifdef NO_STRIDE
  128. static int
  129. copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  130. void *dst_interface, unsigned dst_node,
  131. cudaStream_t stream)
  132. {
  133. return copy_cuda_async_sync(src_interface, src_node,
  134. dst_interface, dst_node,
  135. stream, cudaMemcpyDeviceToDevice);
  136. }
  137. #endif /* !NO_STRIDE */
  138. #endif /* !STARPU_USE_CUDA */
  139. #ifdef STARPU_USE_OPENCL
  140. static int
  141. copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  142. void *dst_interface, unsigned dst_node,
  143. cl_event *event)
  144. {
  145. int ret = 0;
  146. cl_int err;
  147. size_t size = 0;
  148. struct starpu_coo_interface *src_coo, *dst_coo;
  149. src_coo = (struct starpu_coo_interface *) src_interface;
  150. dst_coo = (struct starpu_coo_interface *) dst_interface;
  151. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  152. err = starpu_opencl_copy_ram_to_opencl(
  153. (void *) src_coo->columns,
  154. src_node,
  155. (cl_mem) dst_coo->columns,
  156. dst_node,
  157. size,
  158. 0,
  159. event,
  160. NULL);
  161. if (STARPU_UNLIKELY(err))
  162. STARPU_OPENCL_REPORT_ERROR(err);
  163. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  164. err = starpu_opencl_copy_ram_to_opencl(
  165. (void *) src_coo->rows,
  166. src_node,
  167. (cl_mem) dst_coo->rows,
  168. dst_node,
  169. size,
  170. 0,
  171. event,
  172. NULL);
  173. if (STARPU_UNLIKELY(err))
  174. STARPU_OPENCL_REPORT_ERROR(err);
  175. size = src_coo->n_values * src_coo->elemsize;
  176. err = starpu_opencl_copy_ram_to_opencl(
  177. (void *) src_coo->values,
  178. src_node,
  179. (cl_mem) dst_coo->values,
  180. dst_node,
  181. size,
  182. 0,
  183. event,
  184. &ret);
  185. if (STARPU_UNLIKELY(err))
  186. STARPU_OPENCL_REPORT_ERROR(err);
  187. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  188. src_coo->n_values *
  189. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  190. return ret;
  191. }
  192. static int
  193. copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  194. void *dst_interface, unsigned dst_node,
  195. cl_event *event)
  196. {
  197. int ret = 0;
  198. cl_int err;
  199. size_t size = 0;
  200. struct starpu_coo_interface *src_coo, *dst_coo;
  201. src_coo = (struct starpu_coo_interface *) src_interface;
  202. dst_coo = (struct starpu_coo_interface *) dst_interface;
  203. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  204. err = starpu_opencl_copy_opencl_to_ram(
  205. (void *) src_coo->columns,
  206. src_node,
  207. (cl_mem) dst_coo->columns,
  208. dst_node,
  209. size,
  210. 0,
  211. event,
  212. NULL);
  213. if (STARPU_UNLIKELY(err))
  214. STARPU_OPENCL_REPORT_ERROR(err);
  215. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  216. err = starpu_opencl_copy_opencl_to_ram(
  217. (void *) src_coo->rows,
  218. src_node,
  219. (cl_mem) dst_coo->rows,
  220. dst_node,
  221. size,
  222. 0,
  223. event,
  224. NULL);
  225. if (STARPU_UNLIKELY(err))
  226. STARPU_OPENCL_REPORT_ERROR(err);
  227. size = src_coo->n_values * src_coo->elemsize;
  228. err = starpu_opencl_copy_opencl_to_ram(
  229. (void *) src_coo->values,
  230. src_node,
  231. (cl_mem) dst_coo->values,
  232. dst_node,
  233. size,
  234. 0,
  235. event,
  236. &ret);
  237. if (STARPU_UNLIKELY(err))
  238. STARPU_OPENCL_REPORT_ERROR(err);
  239. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  240. src_coo->n_values *
  241. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  242. return ret;
  243. }
  244. static int
  245. copy_ram_to_opencl(void *src_interface, unsigned src_node,
  246. void *dst_interface, unsigned dst_node)
  247. {
  248. return copy_ram_to_opencl_async(src_interface, src_node,
  249. dst_interface, dst_node,
  250. NULL);
  251. }
  252. static int
  253. copy_opencl_to_ram(void *src_interface, unsigned src_node,
  254. void *dst_interface, unsigned dst_node)
  255. {
  256. return copy_opencl_to_ram_async(src_interface, src_node,
  257. dst_interface, dst_node,
  258. NULL);
  259. }
  260. #endif /* !STARPU_USE_OPENCL */
  261. static struct starpu_data_copy_methods coo_copy_data_methods =
  262. {
  263. .ram_to_ram = copy_ram_to_ram,
  264. #ifdef STARPU_USE_CUDA
  265. .ram_to_cuda = copy_ram_to_cuda,
  266. .cuda_to_ram = copy_cuda_to_ram,
  267. .ram_to_cuda_async = copy_ram_to_cuda_async,
  268. .cuda_to_ram_async = copy_cuda_to_ram_async,
  269. .cuda_to_cuda = copy_cuda_to_cuda,
  270. #ifdef NO_STRIDE
  271. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  272. #endif
  273. #else
  274. #ifdef STARPU_SIMGRID
  275. #ifdef NO_STRIDE
  276. /* Enable GPU-GPU transfers in simgrid */
  277. .cuda_to_cuda_async = 1,
  278. #endif
  279. #endif
  280. #endif /* !STARPU_USE_CUDA */
  281. #ifdef STARPU_USE_OPENCL
  282. .ram_to_opencl = copy_ram_to_opencl,
  283. .opencl_to_ram = copy_opencl_to_ram,
  284. .ram_to_opencl_async = copy_ram_to_opencl_async,
  285. .opencl_to_ram_async = copy_opencl_to_ram_async,
  286. #endif /* !STARPU_USE_OPENCL */
  287. };
  288. static void
  289. register_coo_handle(starpu_data_handle_t handle, uint32_t home_node,
  290. void *data_interface)
  291. {
  292. struct starpu_coo_interface *coo_interface =
  293. (struct starpu_coo_interface *) data_interface;
  294. unsigned node;
  295. for (node = 0; node < STARPU_MAXNODES; node++)
  296. {
  297. struct starpu_coo_interface *local_interface;
  298. local_interface = (struct starpu_coo_interface *)
  299. starpu_data_get_interface_on_node(handle, node);
  300. if (node == home_node)
  301. {
  302. local_interface->values = coo_interface->values;
  303. local_interface->columns = coo_interface->columns;
  304. local_interface->rows = coo_interface->rows;
  305. }
  306. else
  307. {
  308. local_interface->values = 0;
  309. local_interface->columns = 0;
  310. local_interface->rows = 0;
  311. }
  312. local_interface->nx = coo_interface->nx;
  313. local_interface->ny = coo_interface->ny;
  314. local_interface->n_values = coo_interface->n_values;
  315. local_interface->elemsize = coo_interface->elemsize;
  316. }
  317. }
  318. static ssize_t
  319. allocate_coo_buffer_on_node(void *data_interface, uint32_t dst_node)
  320. {
  321. uint32_t *addr_columns = NULL;
  322. uint32_t *addr_rows = NULL;
  323. uintptr_t addr_values = 0;
  324. struct starpu_coo_interface *coo_interface =
  325. (struct starpu_coo_interface *) data_interface;
  326. uint32_t n_values = coo_interface->n_values;
  327. size_t elemsize = coo_interface->elemsize;
  328. addr_columns = (void*) starpu_allocate_buffer_on_node(dst_node, n_values * sizeof(coo_interface->columns[0]));
  329. if (STARPU_UNLIKELY(addr_columns == NULL))
  330. goto fail_columns;
  331. addr_rows = (void*) starpu_allocate_buffer_on_node(dst_node, n_values * sizeof(coo_interface->rows[0]));
  332. if (STARPU_UNLIKELY(addr_rows == NULL))
  333. goto fail_rows;
  334. addr_values = starpu_allocate_buffer_on_node(dst_node, n_values * elemsize);
  335. if (STARPU_UNLIKELY(addr_values == (uintptr_t) NULL))
  336. goto fail_values;
  337. coo_interface->columns = addr_columns;
  338. coo_interface->rows = addr_rows;
  339. coo_interface->values = addr_values;
  340. return n_values * (sizeof(coo_interface->columns[0]) + sizeof(coo_interface->rows[0]) + elemsize);
  341. fail_values:
  342. starpu_free_buffer_on_node(dst_node, (uintptr_t) addr_rows, n_values * sizeof(coo_interface->rows[0]));
  343. fail_rows:
  344. starpu_free_buffer_on_node(dst_node, (uintptr_t) addr_columns, n_values * sizeof(coo_interface->columns[0]));
  345. fail_columns:
  346. return -ENOMEM;
  347. }
  348. static void
  349. free_coo_buffer_on_node(void *data_interface, uint32_t node)
  350. {
  351. struct starpu_coo_interface *coo_interface = (struct starpu_coo_interface *) data_interface;
  352. uint32_t n_values = coo_interface->n_values;
  353. size_t elemsize = coo_interface->elemsize;
  354. starpu_free_buffer_on_node(node, (uintptr_t) coo_interface->columns, n_values * sizeof(coo_interface->columns[0]));
  355. starpu_free_buffer_on_node(node, (uintptr_t) coo_interface->rows, n_values * sizeof(coo_interface->rows[0]));
  356. starpu_free_buffer_on_node(node, coo_interface->values, n_values * elemsize);
  357. }
  358. static size_t
  359. coo_interface_get_size(starpu_data_handle_t handle)
  360. {
  361. struct starpu_coo_interface *coo_interface;
  362. coo_interface = (struct starpu_coo_interface *)
  363. starpu_data_get_interface_on_node(handle, 0);
  364. return coo_interface->nx * coo_interface->ny * coo_interface->elemsize;
  365. }
  366. static uint32_t
  367. coo_interface_footprint(starpu_data_handle_t handle)
  368. {
  369. struct starpu_coo_interface *coo_interface;
  370. coo_interface = (struct starpu_coo_interface *)
  371. starpu_data_get_interface_on_node(handle, 0);
  372. return starpu_crc32_be(coo_interface->nx * coo_interface->ny, 0);
  373. }
  374. static int
  375. coo_compare(void *a, void *b)
  376. {
  377. struct starpu_coo_interface *coo_a, *coo_b;
  378. coo_a = (struct starpu_coo_interface *) a;
  379. coo_b = (struct starpu_coo_interface *) b;
  380. return (coo_a->nx == coo_b->nx &&
  381. coo_a->ny == coo_b->ny &&
  382. coo_a->n_values == coo_b->n_values &&
  383. coo_a->elemsize == coo_b->elemsize);
  384. }
  385. static void
  386. display_coo_interface(starpu_data_handle_t handle, FILE *f)
  387. {
  388. struct starpu_coo_interface *coo_interface =
  389. coo_interface = (struct starpu_coo_interface *)
  390. starpu_data_get_interface_on_node(handle, 0);
  391. fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
  392. }
  393. struct starpu_data_interface_ops _starpu_interface_coo_ops =
  394. {
  395. .register_data_handle = register_coo_handle,
  396. .allocate_data_on_node = allocate_coo_buffer_on_node,
  397. .handle_to_pointer = NULL,
  398. .free_data_on_node = free_coo_buffer_on_node,
  399. .copy_methods = &coo_copy_data_methods,
  400. .get_size = coo_interface_get_size,
  401. .footprint = coo_interface_footprint,
  402. .compare = coo_compare,
  403. #ifdef STARPU_USE_GORDON
  404. .convert_to_gordon = NULL,
  405. #endif
  406. .interfaceid = STARPU_COO_INTERFACE_ID,
  407. .interface_size = sizeof(struct starpu_coo_interface),
  408. .display = display_coo_interface
  409. };
  410. void
  411. starpu_coo_data_register(starpu_data_handle_t *handleptr, uint32_t home_node,
  412. uint32_t nx, uint32_t ny, uint32_t n_values,
  413. uint32_t *columns, uint32_t *rows,
  414. uintptr_t values, size_t elemsize)
  415. {
  416. struct starpu_coo_interface coo_interface =
  417. {
  418. .values = values,
  419. .columns = columns,
  420. .rows = rows,
  421. .nx = nx,
  422. .ny = ny,
  423. .n_values = n_values,
  424. .elemsize = elemsize,
  425. };
  426. starpu_data_register(handleptr, home_node, &coo_interface,
  427. &_starpu_interface_coo_ops);
  428. }