coo_interface.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012 inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/fxt.h>
  18. #include <datawizard/memalloc.h>
  19. static int
  20. copy_ram_to_ram(void *src_interface, STARPU_ATTRIBUTE_UNUSED unsigned src_node,
  21. void *dst_interface, STARPU_ATTRIBUTE_UNUSED unsigned dst_node)
  22. {
  23. size_t size = 0;
  24. struct starpu_coo_interface *src_coo, *dst_coo;
  25. src_coo = (struct starpu_coo_interface *) src_interface;
  26. dst_coo = (struct starpu_coo_interface *) dst_interface;
  27. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  28. memcpy((void *) dst_coo->columns, (void *) src_coo->columns, size);
  29. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  30. memcpy((void *) dst_coo->rows, (void *) src_coo->rows, size);
  31. size = src_coo->n_values * src_coo->elemsize;
  32. memcpy((void *) dst_coo->values, (void *) src_coo->values, size);
  33. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  34. src_coo->n_values *
  35. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  36. return 0;
  37. }
  38. #ifdef STARPU_USE_CUDA
  39. static int
  40. copy_cuda_async_sync(void *src_interface, unsigned src_node,
  41. void *dst_interface, unsigned dst_node,
  42. cudaStream_t stream, enum cudaMemcpyKind kind)
  43. {
  44. int ret;
  45. size_t size = 0;
  46. struct starpu_coo_interface *src_coo, *dst_coo;
  47. src_coo = (struct starpu_coo_interface *) src_interface;
  48. dst_coo = (struct starpu_coo_interface *) dst_interface;
  49. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  50. ret = starpu_cuda_copy_async_sync(
  51. (void *) src_coo->columns,
  52. src_node,
  53. (void *) dst_coo->columns,
  54. dst_node,
  55. size,
  56. stream,
  57. kind);
  58. if (ret == 0)
  59. stream = NULL;
  60. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  61. ret = starpu_cuda_copy_async_sync(
  62. (void *) src_coo->rows,
  63. src_node,
  64. (void *) dst_coo->rows,
  65. dst_node,
  66. size,
  67. stream,
  68. kind);
  69. if (ret == 0)
  70. stream = NULL;
  71. size = src_coo->n_values * src_coo->elemsize;
  72. ret = starpu_cuda_copy_async_sync(
  73. (void *) src_coo->values,
  74. src_node,
  75. (void *) dst_coo->values,
  76. dst_node,
  77. size,
  78. stream,
  79. kind);
  80. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  81. src_coo->n_values *
  82. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  83. return ret;
  84. }
  85. static int
  86. copy_ram_to_cuda(void *src_interface, unsigned src_node,
  87. void *dst_interface, unsigned dst_node)
  88. {
  89. return copy_cuda_async_sync(src_interface, src_node,
  90. dst_interface, dst_node,
  91. NULL, cudaMemcpyHostToDevice);
  92. }
  93. static int
  94. copy_cuda_to_ram(void *src_interface, unsigned src_node,
  95. void *dst_interface, unsigned dst_node)
  96. {
  97. return copy_cuda_async_sync(src_interface, src_node,
  98. dst_interface, dst_node,
  99. NULL, cudaMemcpyDeviceToHost);
  100. }
  101. static int
  102. copy_ram_to_cuda_async(void *src_interface, unsigned src_node,
  103. void *dst_interface, unsigned dst_node,
  104. cudaStream_t stream)
  105. {
  106. return copy_cuda_async_sync(src_interface, src_node,
  107. dst_interface, dst_node,
  108. stream, cudaMemcpyHostToDevice);
  109. }
  110. static int
  111. copy_cuda_to_ram_async(void *src_interface, unsigned src_node,
  112. void *dst_interface, unsigned dst_node,
  113. cudaStream_t stream)
  114. {
  115. return copy_cuda_async_sync(src_interface, src_node,
  116. dst_interface, dst_node,
  117. stream, cudaMemcpyDeviceToHost);
  118. }
  119. static int
  120. copy_cuda_to_cuda(void *src_interface, unsigned src_node,
  121. void *dst_interface, unsigned dst_node)
  122. {
  123. return copy_cuda_async_sync(src_interface, src_node,
  124. dst_interface, dst_node,
  125. NULL, cudaMemcpyDeviceToDevice);
  126. }
  127. #ifdef NO_STRIDE
  128. static int
  129. copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  130. void *dst_interface, unsigned dst_node,
  131. cudaStream_t stream)
  132. {
  133. return copy_cuda_async_sync(src_interface, src_node,
  134. dst_interface, dst_node,
  135. stream, cudaMemcpyDeviceToDevice);
  136. }
  137. #endif /* !NO_STRIDE */
  138. #endif /* !STARPU_USE_CUDA */
  139. #ifdef STARPU_USE_OPENCL
  140. static int
  141. copy_opencl_common(void *src_interface, unsigned src_node,
  142. void *dst_interface, unsigned dst_node,
  143. cl_event *event)
  144. {
  145. int ret = 0;
  146. size_t size = 0;
  147. struct starpu_coo_interface *src_coo, *dst_coo;
  148. src_coo = (struct starpu_coo_interface *) src_interface;
  149. dst_coo = (struct starpu_coo_interface *) dst_interface;
  150. size = src_coo->n_values * sizeof(src_coo->columns[0]);
  151. ret = starpu_opencl_copy_async_sync(
  152. (uintptr_t) src_coo->columns,
  153. src_node,
  154. 0,
  155. (uintptr_t) dst_coo->columns,
  156. dst_node,
  157. 0,
  158. size,
  159. event);
  160. /* sizeof(src_coo->columns[0]) == sizeof(src_coo->rows[0]) */
  161. ret = starpu_opencl_copy_async_sync(
  162. (uintptr_t) src_coo->rows,
  163. src_node,
  164. 0,
  165. (uintptr_t) dst_coo->rows,
  166. dst_node,
  167. 0,
  168. size,
  169. event);
  170. size = src_coo->n_values * src_coo->elemsize;
  171. ret = starpu_opencl_copy_async_sync(
  172. src_coo->values,
  173. src_node,
  174. 0,
  175. (uintptr_t) dst_coo->values,
  176. dst_node,
  177. 0,
  178. size,
  179. event);
  180. _STARPU_TRACE_DATA_COPY(src_node, dst_node,
  181. src_coo->n_values *
  182. (2 * sizeof(src_coo->rows[0]) + src_coo->elemsize));
  183. return ret;
  184. }
  185. static int
  186. copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  187. void *dst_interface, unsigned dst_node,
  188. cl_event *event)
  189. {
  190. return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
  191. }
  192. static int
  193. copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  194. void *dst_interface, unsigned dst_node,
  195. cl_event *event)
  196. {
  197. return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
  198. }
  199. static int
  200. copy_opencl_to_opencl_async(void *src_interface, unsigned src_node,
  201. void *dst_interface, unsigned dst_node,
  202. cl_event *event)
  203. {
  204. return copy_opencl_common(src_interface, src_node, dst_interface, dst_node, event);
  205. }
  206. static int
  207. copy_ram_to_opencl(void *src_interface, unsigned src_node,
  208. void *dst_interface, unsigned dst_node)
  209. {
  210. return copy_ram_to_opencl_async(src_interface, src_node,
  211. dst_interface, dst_node,
  212. NULL);
  213. }
  214. static int
  215. copy_opencl_to_ram(void *src_interface, unsigned src_node,
  216. void *dst_interface, unsigned dst_node)
  217. {
  218. return copy_opencl_to_ram_async(src_interface, src_node,
  219. dst_interface, dst_node,
  220. NULL);
  221. }
  222. static int
  223. copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  224. void *dst_interface, unsigned dst_node)
  225. {
  226. return copy_opencl_to_opencl_async(src_interface, src_node,
  227. dst_interface, dst_node,
  228. NULL);
  229. }
  230. #endif /* !STARPU_USE_OPENCL */
  231. static struct starpu_data_copy_methods coo_copy_data_methods =
  232. {
  233. .ram_to_ram = copy_ram_to_ram,
  234. #ifdef STARPU_USE_CUDA
  235. .ram_to_cuda = copy_ram_to_cuda,
  236. .cuda_to_ram = copy_cuda_to_ram,
  237. .ram_to_cuda_async = copy_ram_to_cuda_async,
  238. .cuda_to_ram_async = copy_cuda_to_ram_async,
  239. .cuda_to_cuda = copy_cuda_to_cuda,
  240. #ifdef NO_STRIDE
  241. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  242. #endif
  243. #else
  244. #ifdef STARPU_SIMGRID
  245. #ifdef NO_STRIDE
  246. /* Enable GPU-GPU transfers in simgrid */
  247. .cuda_to_cuda_async = 1,
  248. #endif
  249. #endif
  250. #endif /* !STARPU_USE_CUDA */
  251. #ifdef STARPU_USE_OPENCL
  252. .ram_to_opencl = copy_ram_to_opencl,
  253. .opencl_to_ram = copy_opencl_to_ram,
  254. .opencl_to_opencl = copy_opencl_to_opencl,
  255. .ram_to_opencl_async = copy_ram_to_opencl_async,
  256. .opencl_to_opencl_async = copy_opencl_to_opencl_async,
  257. #endif /* !STARPU_USE_OPENCL */
  258. };
  259. static void
  260. register_coo_handle(starpu_data_handle_t handle, unsigned home_node,
  261. void *data_interface)
  262. {
  263. struct starpu_coo_interface *coo_interface =
  264. (struct starpu_coo_interface *) data_interface;
  265. unsigned node;
  266. for (node = 0; node < STARPU_MAXNODES; node++)
  267. {
  268. struct starpu_coo_interface *local_interface;
  269. local_interface = (struct starpu_coo_interface *)
  270. starpu_data_get_interface_on_node(handle, node);
  271. if (node == home_node)
  272. {
  273. local_interface->values = coo_interface->values;
  274. local_interface->columns = coo_interface->columns;
  275. local_interface->rows = coo_interface->rows;
  276. }
  277. else
  278. {
  279. local_interface->values = 0;
  280. local_interface->columns = 0;
  281. local_interface->rows = 0;
  282. }
  283. local_interface->nx = coo_interface->nx;
  284. local_interface->ny = coo_interface->ny;
  285. local_interface->n_values = coo_interface->n_values;
  286. local_interface->elemsize = coo_interface->elemsize;
  287. }
  288. }
  289. static ssize_t
  290. allocate_coo_buffer_on_node(void *data_interface, unsigned dst_node)
  291. {
  292. uint32_t *addr_columns = NULL;
  293. uint32_t *addr_rows = NULL;
  294. uintptr_t addr_values = 0;
  295. struct starpu_coo_interface *coo_interface =
  296. (struct starpu_coo_interface *) data_interface;
  297. uint32_t n_values = coo_interface->n_values;
  298. size_t elemsize = coo_interface->elemsize;
  299. addr_columns = (void*) starpu_allocate_buffer_on_node(dst_node, n_values * sizeof(coo_interface->columns[0]));
  300. if (STARPU_UNLIKELY(addr_columns == NULL))
  301. goto fail_columns;
  302. addr_rows = (void*) starpu_allocate_buffer_on_node(dst_node, n_values * sizeof(coo_interface->rows[0]));
  303. if (STARPU_UNLIKELY(addr_rows == NULL))
  304. goto fail_rows;
  305. addr_values = starpu_allocate_buffer_on_node(dst_node, n_values * elemsize);
  306. if (STARPU_UNLIKELY(addr_values == (uintptr_t) NULL))
  307. goto fail_values;
  308. coo_interface->columns = addr_columns;
  309. coo_interface->rows = addr_rows;
  310. coo_interface->values = addr_values;
  311. return n_values * (sizeof(coo_interface->columns[0]) + sizeof(coo_interface->rows[0]) + elemsize);
  312. fail_values:
  313. starpu_free_buffer_on_node(dst_node, (uintptr_t) addr_rows, n_values * sizeof(coo_interface->rows[0]));
  314. fail_rows:
  315. starpu_free_buffer_on_node(dst_node, (uintptr_t) addr_columns, n_values * sizeof(coo_interface->columns[0]));
  316. fail_columns:
  317. return -ENOMEM;
  318. }
  319. static void
  320. free_coo_buffer_on_node(void *data_interface, unsigned node)
  321. {
  322. struct starpu_coo_interface *coo_interface = (struct starpu_coo_interface *) data_interface;
  323. uint32_t n_values = coo_interface->n_values;
  324. size_t elemsize = coo_interface->elemsize;
  325. starpu_free_buffer_on_node(node, (uintptr_t) coo_interface->columns, n_values * sizeof(coo_interface->columns[0]));
  326. starpu_free_buffer_on_node(node, (uintptr_t) coo_interface->rows, n_values * sizeof(coo_interface->rows[0]));
  327. starpu_free_buffer_on_node(node, coo_interface->values, n_values * elemsize);
  328. }
  329. static size_t
  330. coo_interface_get_size(starpu_data_handle_t handle)
  331. {
  332. struct starpu_coo_interface *coo_interface;
  333. coo_interface = (struct starpu_coo_interface *)
  334. starpu_data_get_interface_on_node(handle, 0);
  335. return coo_interface->nx * coo_interface->ny * coo_interface->elemsize;
  336. }
  337. static uint32_t
  338. coo_interface_footprint(starpu_data_handle_t handle)
  339. {
  340. struct starpu_coo_interface *coo_interface;
  341. coo_interface = (struct starpu_coo_interface *)
  342. starpu_data_get_interface_on_node(handle, 0);
  343. return starpu_crc32_be(coo_interface->nx * coo_interface->ny, 0);
  344. }
  345. static int
  346. coo_compare(void *a, void *b)
  347. {
  348. struct starpu_coo_interface *coo_a, *coo_b;
  349. coo_a = (struct starpu_coo_interface *) a;
  350. coo_b = (struct starpu_coo_interface *) b;
  351. return (coo_a->nx == coo_b->nx &&
  352. coo_a->ny == coo_b->ny &&
  353. coo_a->n_values == coo_b->n_values &&
  354. coo_a->elemsize == coo_b->elemsize);
  355. }
  356. static void
  357. display_coo_interface(starpu_data_handle_t handle, FILE *f)
  358. {
  359. struct starpu_coo_interface *coo_interface =
  360. coo_interface = (struct starpu_coo_interface *)
  361. starpu_data_get_interface_on_node(handle, 0);
  362. fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
  363. }
  364. struct starpu_data_interface_ops _starpu_interface_coo_ops =
  365. {
  366. .register_data_handle = register_coo_handle,
  367. .allocate_data_on_node = allocate_coo_buffer_on_node,
  368. .handle_to_pointer = NULL,
  369. .free_data_on_node = free_coo_buffer_on_node,
  370. .copy_methods = &coo_copy_data_methods,
  371. .get_size = coo_interface_get_size,
  372. .footprint = coo_interface_footprint,
  373. .compare = coo_compare,
  374. .interfaceid = STARPU_COO_INTERFACE_ID,
  375. .interface_size = sizeof(struct starpu_coo_interface),
  376. .display = display_coo_interface
  377. };
  378. void
  379. starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
  380. uint32_t nx, uint32_t ny, uint32_t n_values,
  381. uint32_t *columns, uint32_t *rows,
  382. uintptr_t values, size_t elemsize)
  383. {
  384. struct starpu_coo_interface coo_interface =
  385. {
  386. .values = values,
  387. .columns = columns,
  388. .rows = rows,
  389. .nx = nx,
  390. .ny = ny,
  391. .n_values = n_values,
  392. .elemsize = elemsize,
  393. };
  394. starpu_data_register(handleptr, home_node, &coo_interface,
  395. &_starpu_interface_coo_ops);
  396. }