multiformat_interface.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 Institut National de Recherche en Informatique et Automatique
  4. * Copyright (C) 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <starpu.h>
  18. #include <common/config.h>
  19. #include <datawizard/coherency.h>
  20. #include <datawizard/copy_driver.h>
  21. #include <datawizard/filters.h>
  22. #include <starpu_hash.h>
  23. #include <starpu_cuda.h>
  24. #include <starpu_opencl.h>
  25. #include <drivers/opencl/driver_opencl.h>
  26. #include <core/task.h>
  27. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. #ifdef STARPU_USE_CUDA
  29. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  30. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  31. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  32. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  34. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  35. #endif
  36. #ifdef STARPU_USE_OPENCL
  37. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  39. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  40. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  41. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  42. #endif
  43. static struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  44. {
  45. .ram_to_ram = copy_ram_to_ram,
  46. .ram_to_spu = NULL,
  47. #ifdef STARPU_USE_CUDA
  48. .ram_to_cuda = copy_ram_to_cuda,
  49. .cuda_to_ram = copy_cuda_to_ram,
  50. .ram_to_cuda_async = copy_ram_to_cuda_async,
  51. .cuda_to_ram_async = copy_cuda_to_ram_async,
  52. .cuda_to_cuda = copy_cuda_to_cuda,
  53. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  54. #else
  55. #ifdef STARPU_SIMGRID
  56. /* Enable GPU-GPU transfers in simgrid */
  57. .cuda_to_cuda_async = 1,
  58. #endif
  59. #endif
  60. #ifdef STARPU_USE_OPENCL
  61. .ram_to_opencl = copy_ram_to_opencl,
  62. .opencl_to_ram = copy_opencl_to_ram,
  63. .opencl_to_opencl = copy_opencl_to_opencl,
  64. .ram_to_opencl_async = copy_ram_to_opencl_async,
  65. .opencl_to_ram_async = copy_opencl_to_ram_async,
  66. #endif
  67. .cuda_to_spu = NULL,
  68. .spu_to_ram = NULL,
  69. .spu_to_cuda = NULL,
  70. .spu_to_spu = NULL
  71. };
  72. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  73. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  74. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  75. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  76. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  77. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  78. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  79. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  80. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  81. #ifdef STARPU_USE_GORDON
  82. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  83. #endif
  84. static struct starpu_multiformat_data_interface_ops*
  85. get_mf_ops(void *data_interface)
  86. {
  87. struct starpu_multiformat_interface *mf;
  88. mf = (struct starpu_multiformat_interface *) data_interface;
  89. return mf->ops;
  90. }
  91. static struct starpu_data_interface_ops interface_multiformat_ops =
  92. {
  93. .register_data_handle = register_multiformat_handle,
  94. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  95. .handle_to_pointer = multiformat_handle_to_pointer,
  96. .free_data_on_node = free_multiformat_buffer_on_node,
  97. .copy_methods = &multiformat_copy_data_methods_s,
  98. .get_size = multiformat_interface_get_size,
  99. .footprint = footprint_multiformat_interface_crc32,
  100. .compare = multiformat_compare,
  101. #ifdef STARPU_USE_GORDON
  102. .convert_to_gordon = NULL,
  103. #endif
  104. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  105. .interface_size = sizeof(struct starpu_multiformat_interface),
  106. .display = display_multiformat_interface,
  107. .is_multiformat = 1,
  108. .get_mf_ops = get_mf_ops
  109. };
  110. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  111. {
  112. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  113. struct starpu_multiformat_interface *multiformat_interface =
  114. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  115. switch(starpu_node_get_kind(node))
  116. {
  117. case STARPU_CPU_RAM:
  118. return multiformat_interface->cpu_ptr;
  119. #ifdef STARPU_USE_CUDA
  120. case STARPU_CUDA_RAM:
  121. return multiformat_interface->cuda_ptr;
  122. #endif
  123. #ifdef STARPU_USE_OPENCL
  124. case STARPU_OPENCL_RAM:
  125. return multiformat_interface->opencl_ptr;
  126. #endif
  127. default:
  128. STARPU_ABORT();
  129. }
  130. return NULL;
  131. }
  132. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  133. {
  134. struct starpu_multiformat_interface *multiformat_interface;
  135. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  136. unsigned node;
  137. for (node = 0; node < STARPU_MAXNODES; node++)
  138. {
  139. struct starpu_multiformat_interface *local_interface =
  140. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  141. if (node == home_node)
  142. {
  143. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  144. #ifdef STARPU_USE_CUDA
  145. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  146. #endif
  147. #ifdef STARPU_USE_OPENCL
  148. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  149. #endif
  150. }
  151. else
  152. {
  153. local_interface->cpu_ptr = NULL;
  154. #ifdef STARPU_USE_CUDA
  155. local_interface->cuda_ptr = NULL;
  156. #endif
  157. #ifdef STARPU_USE_OPENCL
  158. local_interface->opencl_ptr = NULL;
  159. #endif
  160. }
  161. local_interface->nx = multiformat_interface->nx;
  162. local_interface->ops = multiformat_interface->ops;
  163. }
  164. }
  165. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  166. uint32_t home_node,
  167. void *ptr,
  168. uint32_t nobjects,
  169. struct starpu_multiformat_data_interface_ops *format_ops)
  170. {
  171. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_opencl_cl);
  172. _starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
  173. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
  174. _starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
  175. struct starpu_multiformat_interface multiformat =
  176. {
  177. .cpu_ptr = ptr,
  178. .cuda_ptr = NULL,
  179. .opencl_ptr = NULL,
  180. .nx = nobjects,
  181. .ops = format_ops
  182. };
  183. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  184. }
  185. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  186. {
  187. return starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  188. }
  189. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  190. {
  191. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  192. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  193. return ((multiformat_a->nx == multiformat_b->nx)
  194. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  195. #ifdef STARPU_USE_CUDA
  196. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  197. #endif
  198. #ifdef STARPU_USE_OPENCL
  199. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  200. #endif
  201. );
  202. }
  203. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  204. {
  205. struct starpu_multiformat_interface *multiformat_interface;
  206. multiformat_interface = (struct starpu_multiformat_interface *)
  207. starpu_data_get_interface_on_node(handle, 0);
  208. fprintf(f, "%u\t", multiformat_interface->nx);
  209. }
  210. /* XXX : returns CPU size */
  211. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  212. {
  213. size_t size;
  214. struct starpu_multiformat_interface *multiformat_interface;
  215. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  216. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  217. return size;
  218. }
  219. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  220. {
  221. struct starpu_multiformat_interface *multiformat_interface;
  222. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  223. return multiformat_interface->nx;
  224. }
  225. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  226. {
  227. struct starpu_multiformat_interface *multiformat_interface;
  228. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  229. uintptr_t addr = 0;
  230. ssize_t allocated_memory = 0;
  231. size_t size;
  232. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  233. allocated_memory += size;
  234. addr = starpu_allocate_buffer_on_node(dst_node, size);
  235. if (!addr)
  236. goto fail_cpu;
  237. multiformat_interface->cpu_ptr = (void *) addr;
  238. #ifdef STARPU_USE_CUDA
  239. size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  240. allocated_memory += size;
  241. addr = starpu_allocate_buffer_on_node(dst_node, size);
  242. if (!addr)
  243. goto fail_cuda;
  244. multiformat_interface->cuda_ptr = (void *) addr;
  245. #endif
  246. #ifdef STARPU_USE_OPENCL
  247. size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  248. allocated_memory += size;
  249. addr = starpu_allocate_buffer_on_node(dst_node, size);
  250. if (!addr)
  251. goto fail_opencl;
  252. multiformat_interface->opencl_ptr = (void *) addr;
  253. #endif
  254. return allocated_memory;
  255. #ifdef STARPU_USE_OPENCL
  256. fail_opencl:
  257. #ifdef STARPU_USE_CUDA
  258. starpu_free_buffer_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  259. #endif
  260. #endif
  261. #ifdef STARPU_USE_CUDA
  262. fail_cuda:
  263. #endif
  264. starpu_free_buffer_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  265. fail_cpu:
  266. return -ENOMEM;
  267. }
  268. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  269. {
  270. struct starpu_multiformat_interface *multiformat_interface;
  271. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  272. starpu_free_buffer_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr,
  273. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  274. multiformat_interface->cpu_ptr = NULL;
  275. #ifdef STARPU_USE_CUDA
  276. starpu_free_buffer_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr,
  277. multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  278. multiformat_interface->cuda_ptr = NULL;
  279. #endif
  280. #ifdef STARPU_USE_OPENCL
  281. starpu_free_buffer_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr,
  282. multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  283. multiformat_interface->opencl_ptr = NULL;
  284. #endif
  285. }
  286. /*
  287. * Copy methods
  288. */
  289. static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__ ((unused)),
  290. void *dst_interface, unsigned dst_node __attribute__ ((unused)))
  291. {
  292. struct starpu_multiformat_interface *src_multiformat;
  293. struct starpu_multiformat_interface *dst_multiformat;
  294. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  295. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  296. STARPU_ASSERT(src_multiformat != NULL);
  297. STARPU_ASSERT(dst_multiformat != NULL);
  298. STARPU_ASSERT(dst_multiformat->ops != NULL);
  299. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  300. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  301. return 0;
  302. }
  303. #ifdef STARPU_USE_CUDA
  304. static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__ ((unused)),
  305. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  306. enum cudaMemcpyKind kind)
  307. {
  308. struct starpu_multiformat_interface *src_multiformat;
  309. struct starpu_multiformat_interface *dst_multiformat;
  310. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  311. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  312. size_t size;
  313. cudaError_t status;
  314. switch (kind)
  315. {
  316. case cudaMemcpyHostToDevice:
  317. {
  318. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  319. if (src_multiformat->cuda_ptr == NULL)
  320. {
  321. src_multiformat->cuda_ptr = malloc(size);
  322. if (src_multiformat->cuda_ptr == NULL)
  323. return -ENOMEM;
  324. }
  325. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  326. if (STARPU_UNLIKELY(status))
  327. {
  328. STARPU_CUDA_REPORT_ERROR(status);
  329. }
  330. break;
  331. }
  332. case cudaMemcpyDeviceToHost:
  333. {
  334. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  335. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  336. if (STARPU_UNLIKELY(status))
  337. STARPU_CUDA_REPORT_ERROR(status);
  338. break;
  339. }
  340. case cudaMemcpyDeviceToDevice:
  341. {
  342. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  343. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  344. if (STARPU_UNLIKELY(status))
  345. STARPU_CUDA_REPORT_ERROR(status);
  346. break;
  347. }
  348. default:
  349. STARPU_ABORT();
  350. }
  351. return 0;
  352. }
  353. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  354. {
  355. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  356. }
  357. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  358. {
  359. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  360. }
  361. static int copy_cuda_common_async(void *src_interface, unsigned src_node __attribute__ ((unused)),
  362. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  363. cudaStream_t stream, enum cudaMemcpyKind kind)
  364. {
  365. struct starpu_multiformat_interface *src_multiformat;
  366. struct starpu_multiformat_interface *dst_multiformat;
  367. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  368. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  369. size_t size;
  370. cudaError_t status;
  371. switch (kind)
  372. {
  373. case cudaMemcpyHostToDevice:
  374. {
  375. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  376. if (src_multiformat->cuda_ptr == NULL)
  377. {
  378. src_multiformat->cuda_ptr = malloc(size);
  379. if (src_multiformat->cuda_ptr == NULL)
  380. return -ENOMEM;
  381. }
  382. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  383. if (STARPU_UNLIKELY(status))
  384. {
  385. STARPU_CUDA_REPORT_ERROR(status);
  386. }
  387. break;
  388. }
  389. case cudaMemcpyDeviceToHost:
  390. {
  391. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  392. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  393. if (STARPU_UNLIKELY(status))
  394. STARPU_CUDA_REPORT_ERROR(status);
  395. break;
  396. }
  397. case cudaMemcpyDeviceToDevice:
  398. {
  399. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  400. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  401. if (STARPU_UNLIKELY(status))
  402. STARPU_CUDA_REPORT_ERROR(status);
  403. break;
  404. }
  405. default:
  406. STARPU_ABORT();
  407. }
  408. return 0;
  409. }
  410. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  411. {
  412. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  413. }
  414. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  415. {
  416. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  417. }
  418. #ifdef HAVE_CUDA_MEMCPY_PEER
  419. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  420. void *dst_interface, unsigned dst_node,
  421. cudaStream_t stream)
  422. {
  423. struct starpu_multiformat_interface *src_multiformat;
  424. struct starpu_multiformat_interface *dst_multiformat;
  425. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  426. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  427. STARPU_ASSERT(src_multiformat != NULL);
  428. STARPU_ASSERT(dst_multiformat != NULL);
  429. STARPU_ASSERT(src_multiformat->ops != NULL);
  430. cudaError_t status;
  431. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  432. int src_dev = _starpu_memory_node_to_devid(src_node);
  433. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  434. if (stream)
  435. {
  436. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  437. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  438. src_multiformat->cuda_ptr, src_dev,
  439. size, stream);
  440. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  441. /* All good ! Still, returning -EAGAIN, because we will need to
  442. check the transfert completion later */
  443. if (status == cudaSuccess)
  444. return -EAGAIN;
  445. }
  446. /* Either a synchronous transfert was requested, or the asynchronous one
  447. failed. */
  448. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  449. src_multiformat->cuda_ptr, src_dev,
  450. size);
  451. if (STARPU_UNLIKELY(status != cudaSuccess))
  452. STARPU_CUDA_REPORT_ERROR(status);
  453. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  454. return 0;
  455. }
  456. #endif
  457. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  458. {
  459. if (src_node == dst_node)
  460. {
  461. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  462. }
  463. else
  464. {
  465. #ifdef HAVE_CUDA_MEMCPY_PEER
  466. return copy_cuda_peer_common(src_interface, src_node,
  467. dst_interface, dst_node,
  468. NULL);
  469. #else
  470. STARPU_ABORT();
  471. #endif
  472. }
  473. }
  474. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  475. void *dst_interface, unsigned dst_node,
  476. cudaStream_t stream)
  477. {
  478. if (src_node == dst_node)
  479. {
  480. return copy_cuda_common_async(src_interface, src_node,
  481. dst_interface, dst_node,
  482. stream, cudaMemcpyDeviceToDevice);
  483. }
  484. else
  485. {
  486. #ifdef HAVE_CUDA_MEMCPY_PEER
  487. return copy_cuda_peer_common(src_interface, src_node,
  488. dst_interface, dst_node,
  489. stream);
  490. #else
  491. STARPU_ABORT();
  492. #endif
  493. }
  494. }
  495. #endif /* STARPU_USE_CUDA */
  496. #ifdef STARPU_USE_OPENCL
  497. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  498. void *dst_interface, unsigned dst_node,
  499. cl_event *event)
  500. {
  501. int err, ret;
  502. size_t size;
  503. struct starpu_multiformat_interface *src_multiformat;
  504. struct starpu_multiformat_interface *dst_multiformat;
  505. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  506. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  507. STARPU_ASSERT(src_multiformat != NULL);
  508. STARPU_ASSERT(dst_multiformat != NULL);
  509. STARPU_ASSERT(src_multiformat->ops != NULL);
  510. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  511. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  512. src_node,
  513. (cl_mem) dst_multiformat->cpu_ptr,
  514. dst_node,
  515. size,
  516. 0,
  517. event,
  518. &ret);
  519. if (STARPU_UNLIKELY(err))
  520. STARPU_OPENCL_REPORT_ERROR(err);
  521. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  522. return ret;
  523. }
  524. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  525. void *dst_interface, unsigned dst_node,
  526. cl_event *event)
  527. {
  528. int err, ret;
  529. size_t size;
  530. struct starpu_multiformat_interface *src_multiformat;
  531. struct starpu_multiformat_interface *dst_multiformat;
  532. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  533. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  534. STARPU_ASSERT(src_multiformat != NULL);
  535. STARPU_ASSERT(dst_multiformat != NULL);
  536. STARPU_ASSERT(src_multiformat->ops != NULL);
  537. STARPU_ASSERT(dst_multiformat->ops != NULL);
  538. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  539. if (dst_multiformat->opencl_ptr == NULL)
  540. {
  541. /* XXX : it is weird that we might have to allocate memory here... */
  542. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  543. }
  544. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  545. src_node,
  546. dst_multiformat->opencl_ptr,
  547. dst_node,
  548. size,
  549. 0,
  550. event,
  551. &ret);
  552. if (STARPU_UNLIKELY(err))
  553. STARPU_OPENCL_REPORT_ERROR(err);
  554. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  555. return ret;
  556. }
  557. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  558. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  559. {
  560. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  561. }
  562. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  563. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  564. {
  565. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  566. }
  567. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  568. void *dst_interface, unsigned dst_node)
  569. {
  570. (void) src_interface;
  571. (void) dst_interface;
  572. (void) src_node;
  573. (void) dst_node;
  574. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  575. return 0;
  576. }
  577. #endif