multiformat_interface.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #ifdef BUILDING_STARPU
  18. #include <datawizard/memory_nodes.h>
  19. #endif
  20. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  21. #ifdef STARPU_USE_CUDA
  22. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  23. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  24. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  25. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  26. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  27. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  28. #endif
  29. #ifdef STARPU_USE_OPENCL
  30. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  31. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  32. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  33. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  34. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  35. #endif
  36. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  37. {
  38. .ram_to_ram = copy_ram_to_ram,
  39. #ifdef STARPU_USE_CUDA
  40. .ram_to_cuda = copy_ram_to_cuda,
  41. .cuda_to_ram = copy_cuda_to_ram,
  42. .ram_to_cuda_async = copy_ram_to_cuda_async,
  43. .cuda_to_ram_async = copy_cuda_to_ram_async,
  44. .cuda_to_cuda = copy_cuda_to_cuda,
  45. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  46. #else
  47. #ifdef STARPU_SIMGRID
  48. /* Enable GPU-GPU transfers in simgrid */
  49. .cuda_to_cuda_async = (void *)1,
  50. #endif
  51. #endif
  52. #ifdef STARPU_USE_OPENCL
  53. .ram_to_opencl = copy_ram_to_opencl,
  54. .opencl_to_ram = copy_opencl_to_ram,
  55. .opencl_to_opencl = copy_opencl_to_opencl,
  56. .ram_to_opencl_async = copy_ram_to_opencl_async,
  57. .opencl_to_ram_async = copy_opencl_to_ram_async,
  58. #endif
  59. };
  60. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
  61. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node);
  62. static void *multiformat_to_pointer(void *data_interface, unsigned node);
  63. static int multiformat_pointer_is_inside(void *data_interface, unsigned node, void *ptr);
  64. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node);
  65. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  66. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  67. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  68. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  69. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  70. static struct starpu_multiformat_data_interface_ops*
  71. get_mf_ops(void *data_interface)
  72. {
  73. struct starpu_multiformat_interface *mf;
  74. mf = (struct starpu_multiformat_interface *) data_interface;
  75. return mf->ops;
  76. }
  77. struct starpu_data_interface_ops starpu_interface_multiformat_ops =
  78. {
  79. .register_data_handle = register_multiformat_handle,
  80. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  81. .to_pointer = multiformat_to_pointer,
  82. .pointer_is_inside = multiformat_pointer_is_inside,
  83. .free_data_on_node = free_multiformat_buffer_on_node,
  84. .copy_methods = &multiformat_copy_data_methods_s,
  85. .get_size = multiformat_interface_get_size,
  86. .footprint = footprint_multiformat_interface_crc32,
  87. .compare = multiformat_compare,
  88. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  89. .interface_size = sizeof(struct starpu_multiformat_interface),
  90. .display = display_multiformat_interface,
  91. .is_multiformat = 1,
  92. .get_mf_ops = get_mf_ops
  93. };
  94. static void *multiformat_to_pointer(void *data_interface, unsigned node)
  95. {
  96. struct starpu_multiformat_interface *multiformat_interface = data_interface;
  97. switch(starpu_node_get_kind(node))
  98. {
  99. case STARPU_CPU_RAM:
  100. return multiformat_interface->cpu_ptr;
  101. #ifdef STARPU_USE_CUDA
  102. case STARPU_CUDA_RAM:
  103. return multiformat_interface->cuda_ptr;
  104. #endif
  105. #ifdef STARPU_USE_OPENCL
  106. case STARPU_OPENCL_RAM:
  107. return multiformat_interface->opencl_ptr;
  108. #endif
  109. default:
  110. STARPU_ABORT();
  111. }
  112. return NULL;
  113. }
  114. static int multiformat_pointer_is_inside(void *data_interface, unsigned node, void *ptr)
  115. {
  116. struct starpu_multiformat_interface *multiformat_interface = data_interface;
  117. switch(starpu_node_get_kind(node))
  118. {
  119. case STARPU_CPU_RAM:
  120. return (char*) ptr >= (char*) multiformat_interface->cpu_ptr &&
  121. (char*) ptr < (char*) multiformat_interface->cpu_ptr + multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  122. #ifdef STARPU_USE_CUDA
  123. case STARPU_CUDA_RAM:
  124. return (char*) ptr >= (char*) multiformat_interface->cuda_ptr &&
  125. (char*) ptr < (char*) multiformat_interface->cuda_ptr + multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  126. #endif
  127. #ifdef STARPU_USE_OPENCL
  128. case STARPU_OPENCL_RAM:
  129. return (char*) ptr >= (char*) multiformat_interface->opencl_ptr &&
  130. (char*) ptr < (char*) multiformat_interface->opencl_ptr + multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  131. #endif
  132. default:
  133. STARPU_ABORT();
  134. }
  135. return -1;
  136. }
  137. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
  138. {
  139. struct starpu_multiformat_interface *multiformat_interface;
  140. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  141. unsigned node;
  142. for (node = 0; node < STARPU_MAXNODES; node++)
  143. {
  144. struct starpu_multiformat_interface *local_interface =
  145. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  146. if (node == home_node)
  147. {
  148. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  149. #ifdef STARPU_USE_CUDA
  150. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  151. #endif
  152. #ifdef STARPU_USE_OPENCL
  153. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  154. #endif
  155. }
  156. else
  157. {
  158. local_interface->cpu_ptr = NULL;
  159. #ifdef STARPU_USE_CUDA
  160. local_interface->cuda_ptr = NULL;
  161. #endif
  162. #ifdef STARPU_USE_OPENCL
  163. local_interface->opencl_ptr = NULL;
  164. #endif
  165. }
  166. local_interface->id = multiformat_interface->id;
  167. local_interface->nx = multiformat_interface->nx;
  168. local_interface->ops = multiformat_interface->ops;
  169. }
  170. }
  171. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  172. int home_node,
  173. void *ptr,
  174. uint32_t nobjects,
  175. struct starpu_multiformat_data_interface_ops *format_ops)
  176. {
  177. struct starpu_multiformat_interface multiformat =
  178. {
  179. .id = STARPU_MULTIFORMAT_INTERFACE_ID,
  180. .cpu_ptr = ptr,
  181. .cuda_ptr = NULL,
  182. .opencl_ptr = NULL,
  183. .nx = nobjects,
  184. .ops = format_ops
  185. };
  186. starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops);
  187. }
  188. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  189. {
  190. return starpu_hash_crc32c_be(starpu_multiformat_get_nx(handle), 0);
  191. }
  192. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  193. {
  194. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  195. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  196. return (multiformat_a->nx == multiformat_b->nx)
  197. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  198. #ifdef STARPU_USE_CUDA
  199. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  200. #endif
  201. #ifdef STARPU_USE_OPENCL
  202. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  203. #endif
  204. ;
  205. }
  206. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  207. {
  208. struct starpu_multiformat_interface *multiformat_interface;
  209. multiformat_interface = (struct starpu_multiformat_interface *)
  210. starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  211. fprintf(f, "%u\t", multiformat_interface->nx);
  212. }
  213. /* XXX : returns CPU size */
  214. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  215. {
  216. size_t size;
  217. struct starpu_multiformat_interface *multiformat_interface;
  218. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  219. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  220. return size;
  221. }
  222. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  223. {
  224. struct starpu_multiformat_interface *multiformat_interface;
  225. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  226. return multiformat_interface->nx;
  227. }
  228. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node)
  229. {
  230. struct starpu_multiformat_interface *multiformat_interface;
  231. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  232. uintptr_t addr = 0;
  233. starpu_ssize_t allocated_memory = 0;
  234. size_t size;
  235. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  236. allocated_memory += size;
  237. addr = starpu_malloc_on_node(dst_node, size);
  238. if (!addr)
  239. goto fail_cpu;
  240. multiformat_interface->cpu_ptr = (void *) addr;
  241. #ifdef STARPU_USE_CUDA
  242. size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  243. allocated_memory += size;
  244. addr = starpu_malloc_on_node(dst_node, size);
  245. if (!addr)
  246. goto fail_cuda;
  247. multiformat_interface->cuda_ptr = (void *) addr;
  248. #endif
  249. #ifdef STARPU_USE_OPENCL
  250. size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  251. allocated_memory += size;
  252. addr = starpu_malloc_on_node(dst_node, size);
  253. if (!addr)
  254. goto fail_opencl;
  255. multiformat_interface->opencl_ptr = (void *) addr;
  256. #endif
  257. return allocated_memory;
  258. #ifdef STARPU_USE_OPENCL
  259. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  260. fail_opencl:
  261. #endif
  262. #ifdef STARPU_USE_CUDA
  263. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  264. fail_cuda:
  265. #endif
  266. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  267. fail_cpu:
  268. return -ENOMEM;
  269. }
  270. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node)
  271. {
  272. struct starpu_multiformat_interface *multiformat_interface;
  273. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  274. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr,
  275. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  276. multiformat_interface->cpu_ptr = NULL;
  277. #ifdef STARPU_USE_CUDA
  278. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr,
  279. multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  280. multiformat_interface->cuda_ptr = NULL;
  281. #endif
  282. #ifdef STARPU_USE_OPENCL
  283. starpu_free_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr,
  284. multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  285. multiformat_interface->opencl_ptr = NULL;
  286. #endif
  287. }
  288. /*
  289. * Copy methods
  290. */
  291. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  292. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  293. {
  294. struct starpu_multiformat_interface *src_multiformat;
  295. struct starpu_multiformat_interface *dst_multiformat;
  296. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  297. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  298. STARPU_ASSERT(src_multiformat != NULL);
  299. STARPU_ASSERT(dst_multiformat != NULL);
  300. STARPU_ASSERT(dst_multiformat->ops != NULL);
  301. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  302. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  303. return 0;
  304. }
  305. #ifdef STARPU_USE_CUDA
  306. static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  307. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  308. enum cudaMemcpyKind kind)
  309. {
  310. struct starpu_multiformat_interface *src_multiformat;
  311. struct starpu_multiformat_interface *dst_multiformat;
  312. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  313. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  314. size_t size;
  315. cudaError_t status;
  316. switch (kind)
  317. {
  318. case cudaMemcpyHostToDevice:
  319. {
  320. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  321. if (src_multiformat->cuda_ptr == NULL)
  322. {
  323. src_multiformat->cuda_ptr = malloc(size);
  324. if (src_multiformat->cuda_ptr == NULL)
  325. return -ENOMEM;
  326. }
  327. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  328. if (!status)
  329. status = cudaDeviceSynchronize();
  330. if (STARPU_UNLIKELY(status))
  331. STARPU_CUDA_REPORT_ERROR(status);
  332. break;
  333. }
  334. case cudaMemcpyDeviceToHost:
  335. {
  336. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  337. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  338. if (!status)
  339. status = cudaDeviceSynchronize();
  340. if (STARPU_UNLIKELY(status))
  341. STARPU_CUDA_REPORT_ERROR(status);
  342. break;
  343. }
  344. case cudaMemcpyDeviceToDevice:
  345. {
  346. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  347. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  348. if (!status)
  349. status = cudaDeviceSynchronize();
  350. if (STARPU_UNLIKELY(status))
  351. STARPU_CUDA_REPORT_ERROR(status);
  352. break;
  353. }
  354. default:
  355. STARPU_ABORT();
  356. }
  357. return 0;
  358. }
  359. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  360. {
  361. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  362. }
  363. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  364. {
  365. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  366. }
  367. static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  368. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  369. cudaStream_t stream, enum cudaMemcpyKind kind)
  370. {
  371. struct starpu_multiformat_interface *src_multiformat;
  372. struct starpu_multiformat_interface *dst_multiformat;
  373. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  374. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  375. size_t size;
  376. cudaError_t status;
  377. switch (kind)
  378. {
  379. case cudaMemcpyHostToDevice:
  380. {
  381. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  382. if (src_multiformat->cuda_ptr == NULL)
  383. {
  384. src_multiformat->cuda_ptr = malloc(size);
  385. if (src_multiformat->cuda_ptr == NULL)
  386. return -ENOMEM;
  387. }
  388. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  389. if (STARPU_UNLIKELY(status))
  390. {
  391. STARPU_CUDA_REPORT_ERROR(status);
  392. }
  393. break;
  394. }
  395. case cudaMemcpyDeviceToHost:
  396. {
  397. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  398. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  399. if (!status)
  400. status = cudaDeviceSynchronize();
  401. if (STARPU_UNLIKELY(status))
  402. STARPU_CUDA_REPORT_ERROR(status);
  403. break;
  404. }
  405. case cudaMemcpyDeviceToDevice:
  406. {
  407. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  408. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  409. if (STARPU_UNLIKELY(status))
  410. STARPU_CUDA_REPORT_ERROR(status);
  411. break;
  412. }
  413. default:
  414. STARPU_ABORT();
  415. }
  416. return 0;
  417. }
  418. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  419. {
  420. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  421. }
  422. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  423. {
  424. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  425. }
  426. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  427. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  428. void *dst_interface, unsigned dst_node,
  429. cudaStream_t stream)
  430. {
  431. struct starpu_multiformat_interface *src_multiformat;
  432. struct starpu_multiformat_interface *dst_multiformat;
  433. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  434. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  435. STARPU_ASSERT(src_multiformat != NULL);
  436. STARPU_ASSERT(dst_multiformat != NULL);
  437. STARPU_ASSERT(src_multiformat->ops != NULL);
  438. cudaError_t status;
  439. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  440. int src_dev = starpu_memory_node_get_devid(src_node);
  441. int dst_dev = starpu_memory_node_get_devid(dst_node);
  442. if (stream)
  443. {
  444. double start;
  445. starpu_interface_start_driver_copy_async(src_node, dst_node, &start);
  446. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  447. src_multiformat->cuda_ptr, src_dev,
  448. size, stream);
  449. starpu_interface_end_driver_copy_async(src_node, dst_node, start);
  450. /* All good ! Still, returning -EAGAIN, because we will need to
  451. check the transfert completion later */
  452. if (status == cudaSuccess)
  453. return -EAGAIN;
  454. }
  455. /* Either a synchronous transfert was requested, or the asynchronous one
  456. failed. */
  457. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  458. src_multiformat->cuda_ptr, src_dev,
  459. size);
  460. if (!status)
  461. status = cudaDeviceSynchronize();
  462. if (STARPU_UNLIKELY(status != cudaSuccess))
  463. STARPU_CUDA_REPORT_ERROR(status);
  464. starpu_interface_data_copy(src_node, dst_node, size);
  465. return 0;
  466. }
  467. #endif
  468. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  469. {
  470. if (src_node == dst_node)
  471. {
  472. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  473. }
  474. else
  475. {
  476. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  477. return copy_cuda_peer_common(src_interface, src_node,
  478. dst_interface, dst_node,
  479. NULL);
  480. #else
  481. STARPU_ABORT();
  482. #endif
  483. }
  484. }
  485. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  486. void *dst_interface, unsigned dst_node,
  487. cudaStream_t stream)
  488. {
  489. if (src_node == dst_node)
  490. {
  491. return copy_cuda_common_async(src_interface, src_node,
  492. dst_interface, dst_node,
  493. stream, cudaMemcpyDeviceToDevice);
  494. }
  495. else
  496. {
  497. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  498. return copy_cuda_peer_common(src_interface, src_node,
  499. dst_interface, dst_node,
  500. stream);
  501. #else
  502. STARPU_ABORT();
  503. #endif
  504. }
  505. }
  506. #endif /* STARPU_USE_CUDA */
  507. #ifdef STARPU_USE_OPENCL
  508. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  509. void *dst_interface, unsigned dst_node,
  510. cl_event *event)
  511. {
  512. int err, ret;
  513. size_t size;
  514. struct starpu_multiformat_interface *src_multiformat;
  515. struct starpu_multiformat_interface *dst_multiformat;
  516. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  517. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  518. STARPU_ASSERT(src_multiformat != NULL);
  519. STARPU_ASSERT(dst_multiformat != NULL);
  520. STARPU_ASSERT(src_multiformat->ops != NULL);
  521. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  522. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  523. src_node,
  524. (cl_mem) dst_multiformat->cpu_ptr,
  525. dst_node,
  526. size,
  527. 0,
  528. event,
  529. &ret);
  530. if (STARPU_UNLIKELY(err))
  531. STARPU_OPENCL_REPORT_ERROR(err);
  532. starpu_interface_data_copy(src_node, dst_node, size);
  533. return ret;
  534. }
  535. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  536. void *dst_interface, unsigned dst_node,
  537. cl_event *event)
  538. {
  539. int err, ret;
  540. size_t size;
  541. struct starpu_multiformat_interface *src_multiformat;
  542. struct starpu_multiformat_interface *dst_multiformat;
  543. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  544. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  545. STARPU_ASSERT(src_multiformat != NULL);
  546. STARPU_ASSERT(dst_multiformat != NULL);
  547. STARPU_ASSERT(src_multiformat->ops != NULL);
  548. STARPU_ASSERT(dst_multiformat->ops != NULL);
  549. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  550. if (dst_multiformat->opencl_ptr == NULL)
  551. {
  552. /* XXX : it is weird that we might have to allocate memory here... */
  553. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  554. STARPU_ASSERT_MSG(dst_multiformat->opencl_ptr != NULL || dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize == 0, "Cannot allocate %ld bytes\n", (long) (dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize));
  555. }
  556. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  557. src_node,
  558. dst_multiformat->opencl_ptr,
  559. dst_node,
  560. size,
  561. 0,
  562. event,
  563. &ret);
  564. if (STARPU_UNLIKELY(err))
  565. STARPU_OPENCL_REPORT_ERROR(err);
  566. starpu_interface_data_copy(src_node, dst_node, size);
  567. return ret;
  568. }
  569. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  570. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  571. {
  572. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  573. }
  574. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  575. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  576. {
  577. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  578. }
  579. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  580. void *dst_interface, unsigned dst_node)
  581. {
  582. (void) src_interface;
  583. (void) dst_interface;
  584. (void) src_node;
  585. (void) dst_node;
  586. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  587. return 0;
  588. }
  589. #endif