multiformat_interface.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 Institut National de Recherche en Informatique et Automatique
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <datawizard/coherency.h>
  19. #include <datawizard/copy_driver.h>
  20. #include <datawizard/filters.h>
  21. #include <starpu_hash.h>
  22. #include <starpu_cuda.h>
  23. #include <starpu_opencl.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. #include <core/task.h>
  26. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  27. #ifdef STARPU_USE_CUDA
  28. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  30. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  31. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  32. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  33. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  34. #endif
  35. #ifdef STARPU_USE_OPENCL
  36. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  37. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  39. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  40. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  41. #endif
  42. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  43. {
  44. .ram_to_ram = copy_ram_to_ram,
  45. .ram_to_spu = NULL,
  46. #ifdef STARPU_USE_CUDA
  47. .ram_to_cuda = copy_ram_to_cuda,
  48. .cuda_to_ram = copy_cuda_to_ram,
  49. .ram_to_cuda_async = copy_ram_to_cuda_async,
  50. .cuda_to_ram_async = copy_cuda_to_ram_async,
  51. .cuda_to_cuda = copy_cuda_to_cuda,
  52. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  53. #endif
  54. #ifdef STARPU_USE_OPENCL
  55. .ram_to_opencl = copy_ram_to_opencl,
  56. .opencl_to_ram = copy_opencl_to_ram,
  57. .opencl_to_opencl = copy_opencl_to_opencl,
  58. .ram_to_opencl_async = copy_ram_to_opencl_async,
  59. .opencl_to_ram_async = copy_opencl_to_ram_async,
  60. #endif
  61. .cuda_to_spu = NULL,
  62. .spu_to_ram = NULL,
  63. .spu_to_cuda = NULL,
  64. .spu_to_spu = NULL
  65. };
  66. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  67. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  68. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  69. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  70. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  71. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  72. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  73. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  74. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  75. #ifdef STARPU_USE_GORDON
  76. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  77. #endif
  78. static struct starpu_multiformat_data_interface_ops*
  79. get_mf_ops(void *data_interface)
  80. {
  81. struct starpu_multiformat_interface *mf;
  82. mf = (struct starpu_multiformat_interface *) data_interface;
  83. return mf->ops;
  84. }
  85. static struct starpu_data_interface_ops interface_multiformat_ops =
  86. {
  87. .register_data_handle = register_multiformat_handle,
  88. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  89. .handle_to_pointer = multiformat_handle_to_pointer,
  90. .free_data_on_node = free_multiformat_buffer_on_node,
  91. .copy_methods = &multiformat_copy_data_methods_s,
  92. .get_size = multiformat_interface_get_size,
  93. .footprint = footprint_multiformat_interface_crc32,
  94. .compare = multiformat_compare,
  95. #ifdef STARPU_USE_GORDON
  96. .convert_to_gordon = NULL,
  97. #endif
  98. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  99. .interface_size = sizeof(struct starpu_multiformat_interface),
  100. .display = display_multiformat_interface,
  101. .is_multiformat = 1,
  102. .get_mf_ops = get_mf_ops
  103. };
  104. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  105. {
  106. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  107. struct starpu_multiformat_interface *multiformat_interface =
  108. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  109. switch(starpu_node_get_kind(node))
  110. {
  111. case STARPU_CPU_RAM:
  112. return multiformat_interface->cpu_ptr;
  113. #ifdef STARPU_USE_CUDA
  114. case STARPU_CUDA_RAM:
  115. return multiformat_interface->cuda_ptr;
  116. #endif
  117. #ifdef STARPU_USE_OPENCL
  118. case STARPU_OPENCL_RAM:
  119. return multiformat_interface->opencl_ptr;
  120. #endif
  121. default:
  122. STARPU_ASSERT(0);
  123. }
  124. }
  125. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  126. {
  127. struct starpu_multiformat_interface *multiformat_interface;
  128. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  129. unsigned node;
  130. for (node = 0; node < STARPU_MAXNODES; node++)
  131. {
  132. struct starpu_multiformat_interface *local_interface =
  133. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  134. if (node == home_node)
  135. {
  136. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  137. #ifdef STARPU_USE_CUDA
  138. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  139. #endif
  140. #ifdef STARPU_USE_OPENCL
  141. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  142. #endif
  143. }
  144. else
  145. {
  146. local_interface->cpu_ptr = NULL;
  147. #ifdef STARPU_USE_CUDA
  148. local_interface->cuda_ptr = NULL;
  149. #endif
  150. #ifdef STARPU_USE_OPENCL
  151. local_interface->opencl_ptr = NULL;
  152. #endif
  153. }
  154. local_interface->nx = multiformat_interface->nx;
  155. local_interface->ops = multiformat_interface->ops;
  156. }
  157. }
  158. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  159. uint32_t home_node,
  160. void *ptr,
  161. uint32_t nobjects,
  162. struct starpu_multiformat_data_interface_ops *format_ops)
  163. {
  164. #ifdef STARPU_USE_OPENCL
  165. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_opencl_cl);
  166. _starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
  167. #endif
  168. #ifdef STARPU_USE_CUDA
  169. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
  170. _starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
  171. #endif
  172. struct starpu_multiformat_interface multiformat =
  173. {
  174. .cpu_ptr = ptr,
  175. #ifdef STARPU_USE_CUDA
  176. .cuda_ptr = NULL,
  177. #endif
  178. #ifdef STARPu_USE_OPENCL
  179. .opencl_ptr = NULL,
  180. #endif
  181. .nx = nobjects,
  182. .ops = format_ops
  183. };
  184. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  185. }
  186. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  187. {
  188. return starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  189. }
  190. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  191. {
  192. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  193. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  194. return ((multiformat_a->nx == multiformat_b->nx)
  195. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  196. #ifdef STARPU_USE_CUDA
  197. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  198. #endif
  199. #ifdef STARPU_USE_OPENCL
  200. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  201. #endif
  202. );
  203. }
  204. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  205. {
  206. /* TODO */
  207. struct starpu_multiformat_interface *multiformat_interface;
  208. multiformat_interface = (struct starpu_multiformat_interface *)
  209. starpu_data_get_interface_on_node(handle, 0);
  210. fprintf(f, "%u\t", multiformat_interface->nx);
  211. }
  212. /* XXX : returns CPU size */
  213. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  214. {
  215. size_t size;
  216. struct starpu_multiformat_interface *multiformat_interface;
  217. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  218. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  219. return size;
  220. }
  221. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  222. {
  223. struct starpu_multiformat_interface *multiformat_interface;
  224. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  225. return multiformat_interface->nx;
  226. }
  227. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  228. {
  229. struct starpu_multiformat_interface *multiformat_interface;
  230. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  231. enum starpu_node_kind kind = starpu_node_get_kind(node);
  232. switch(kind)
  233. {
  234. case STARPU_CPU_RAM:
  235. free(multiformat_interface->cpu_ptr);
  236. multiformat_interface->cpu_ptr = NULL;
  237. break;
  238. #ifdef STARPU_USE_CUDA
  239. case STARPU_CUDA_RAM:
  240. if (multiformat_interface->cpu_ptr)
  241. {
  242. cudaFree(multiformat_interface->cpu_ptr);
  243. multiformat_interface->cpu_ptr = NULL;
  244. }
  245. if (multiformat_interface->cuda_ptr)
  246. {
  247. cudaFree(multiformat_interface->cuda_ptr);
  248. multiformat_interface->cuda_ptr = NULL;
  249. }
  250. break;
  251. #endif
  252. #ifdef STARPU_USE_OPENCL
  253. case STARPU_OPENCL_RAM:
  254. /* TODO */
  255. break;
  256. #endif
  257. default:
  258. STARPU_ABORT();
  259. }
  260. }
  261. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  262. {
  263. struct starpu_multiformat_interface *multiformat_interface;
  264. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  265. unsigned fail = 0;
  266. uintptr_t addr = 0;
  267. ssize_t allocated_memory;
  268. enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
  269. switch(kind)
  270. {
  271. case STARPU_CPU_RAM:
  272. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  273. addr = (uintptr_t)malloc(allocated_memory);
  274. if (!addr)
  275. {
  276. fail = 1;
  277. }
  278. else
  279. {
  280. multiformat_interface->cpu_ptr = (void *) addr;
  281. }
  282. #ifdef STARPU_USE_CUDA
  283. multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  284. STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
  285. #endif
  286. #ifdef STARPU_USE_OPENCL
  287. multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  288. STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
  289. #endif
  290. break;
  291. #ifdef STARPU_USE_CUDA
  292. case STARPU_CUDA_RAM:
  293. {
  294. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  295. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  296. if (STARPU_UNLIKELY(status))
  297. {
  298. STARPU_CUDA_REPORT_ERROR(status);
  299. }
  300. else
  301. {
  302. multiformat_interface->cuda_ptr = (void *)addr;
  303. }
  304. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  305. status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
  306. if (STARPU_UNLIKELY(status != cudaSuccess))
  307. STARPU_CUDA_REPORT_ERROR(status);
  308. break;
  309. }
  310. #endif
  311. #ifdef STARPU_USE_OPENCL
  312. case STARPU_OPENCL_RAM:
  313. {
  314. int ret;
  315. cl_mem ptr;
  316. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  317. ret = starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  318. addr = (uintptr_t)ptr;
  319. if (ret)
  320. {
  321. fail = 1;
  322. }
  323. else
  324. {
  325. multiformat_interface->opencl_ptr = (void *)addr;
  326. }
  327. ret = starpu_opencl_allocate_memory(&ptr,
  328. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
  329. CL_MEM_READ_WRITE);
  330. addr = (uintptr_t)ptr;
  331. if (ret)
  332. {
  333. fail = 1;
  334. }
  335. else
  336. {
  337. multiformat_interface->cpu_ptr = (void *) addr;
  338. }
  339. break;
  340. }
  341. #endif
  342. default:
  343. STARPU_ASSERT(0);
  344. }
  345. if (fail)
  346. return -ENOMEM;
  347. return allocated_memory;
  348. }
  349. /*
  350. * Copy methods
  351. */
  352. static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__ ((unused)),
  353. void *dst_interface, unsigned dst_node __attribute__ ((unused)))
  354. {
  355. struct starpu_multiformat_interface *src_multiformat;
  356. struct starpu_multiformat_interface *dst_multiformat;
  357. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  358. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  359. STARPU_ASSERT(src_multiformat != NULL);
  360. STARPU_ASSERT(dst_multiformat != NULL);
  361. STARPU_ASSERT(dst_multiformat->ops != NULL);
  362. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  363. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  364. return 0;
  365. }
  366. #ifdef STARPU_USE_CUDA
  367. static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__ ((unused)),
  368. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  369. enum cudaMemcpyKind kind)
  370. {
  371. struct starpu_multiformat_interface *src_multiformat;
  372. struct starpu_multiformat_interface *dst_multiformat;
  373. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  374. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  375. size_t size;
  376. cudaError_t status;
  377. switch (kind)
  378. {
  379. case cudaMemcpyHostToDevice:
  380. {
  381. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  382. if (src_multiformat->cuda_ptr == NULL)
  383. {
  384. src_multiformat->cuda_ptr = malloc(size);
  385. if (src_multiformat->cuda_ptr == NULL)
  386. return -ENOMEM;
  387. }
  388. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  389. if (STARPU_UNLIKELY(status))
  390. {
  391. STARPU_CUDA_REPORT_ERROR(status);
  392. }
  393. break;
  394. }
  395. case cudaMemcpyDeviceToHost:
  396. {
  397. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  398. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  399. if (STARPU_UNLIKELY(status))
  400. STARPU_CUDA_REPORT_ERROR(status);
  401. break;
  402. }
  403. case cudaMemcpyDeviceToDevice:
  404. {
  405. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  406. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  407. if (STARPU_UNLIKELY(status))
  408. STARPU_CUDA_REPORT_ERROR(status);
  409. break;
  410. }
  411. default:
  412. STARPU_ASSERT(0);
  413. }
  414. return 0;
  415. }
  416. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  417. {
  418. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  419. }
  420. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  421. {
  422. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  423. }
  424. static int copy_cuda_common_async(void *src_interface, unsigned src_node __attribute__ ((unused)),
  425. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  426. cudaStream_t stream, enum cudaMemcpyKind kind)
  427. {
  428. struct starpu_multiformat_interface *src_multiformat;
  429. struct starpu_multiformat_interface *dst_multiformat;
  430. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  431. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  432. size_t size;
  433. cudaError_t status;
  434. switch (kind)
  435. {
  436. case cudaMemcpyHostToDevice:
  437. {
  438. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  439. if (src_multiformat->cuda_ptr == NULL)
  440. {
  441. src_multiformat->cuda_ptr = malloc(size);
  442. if (src_multiformat->cuda_ptr == NULL)
  443. return -ENOMEM;
  444. }
  445. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  446. if (STARPU_UNLIKELY(status))
  447. {
  448. STARPU_CUDA_REPORT_ERROR(status);
  449. }
  450. break;
  451. }
  452. case cudaMemcpyDeviceToHost:
  453. {
  454. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  455. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  456. if (STARPU_UNLIKELY(status))
  457. STARPU_CUDA_REPORT_ERROR(status);
  458. break;
  459. }
  460. case cudaMemcpyDeviceToDevice:
  461. {
  462. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  463. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  464. if (STARPU_UNLIKELY(status))
  465. STARPU_CUDA_REPORT_ERROR(status);
  466. break;
  467. }
  468. default:
  469. STARPU_ASSERT(0);
  470. }
  471. return 0;
  472. }
  473. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  474. {
  475. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  476. }
  477. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  478. {
  479. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  480. }
  481. #ifdef HAVE_CUDA_MEMCPY_PEER
  482. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  483. void *dst_interface, unsigned dst_node,
  484. cudaStream_t stream)
  485. {
  486. struct starpu_multiformat_interface *src_multiformat;
  487. struct starpu_multiformat_interface *dst_multiformat;
  488. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  489. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  490. STARPU_ASSERT(src_multiformat != NULL);
  491. STARPU_ASSERT(dst_multiformat != NULL);
  492. STARPU_ASSERT(src_multiformat->ops != NULL);
  493. cudaError_t status;
  494. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  495. int src_dev = _starpu_memory_node_to_devid(src_node);
  496. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  497. if (stream)
  498. {
  499. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  500. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  501. src_multiformat->cuda_ptr, src_dev,
  502. size, stream);
  503. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  504. /* All good ! Still, returning -EAGAIN, because we will need to
  505. check the transfert completion later */
  506. if (status == cudaSuccess)
  507. return -EAGAIN;
  508. }
  509. /* Either a synchronous transfert was requested, or the asynchronous one
  510. failed. */
  511. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  512. src_multiformat->cuda_ptr, src_dev,
  513. size);
  514. if (STARPU_UNLIKELY(status != cudaSuccess))
  515. STARPU_CUDA_REPORT_ERROR(status);
  516. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  517. return 0;
  518. }
  519. #endif
  520. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  521. {
  522. if (src_node == dst_node)
  523. {
  524. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  525. }
  526. else
  527. {
  528. #ifdef HAVE_CUDA_MEMCPY_PEER
  529. return copy_cuda_peer_common(src_interface, src_node,
  530. dst_interface, dst_node,
  531. NULL);
  532. #else
  533. STARPU_ASSERT(0);
  534. #endif
  535. }
  536. }
  537. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  538. void *dst_interface, unsigned dst_node,
  539. cudaStream_t stream)
  540. {
  541. if (src_node == dst_node)
  542. {
  543. return copy_cuda_common_async(src_interface, src_node,
  544. dst_interface, dst_node,
  545. stream, cudaMemcpyDeviceToDevice);
  546. }
  547. else
  548. {
  549. #ifdef HAVE_CUDA_MEMCPY_PEER
  550. return copy_cuda_peer_common(src_interface, src_node,
  551. dst_interface, dst_node,
  552. stream);
  553. #else
  554. STARPU_ASSERT(0);
  555. #endif
  556. }
  557. }
  558. #endif /* STARPU_USE_CUDA */
  559. #ifdef STARPU_USE_OPENCL
  560. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  561. void *dst_interface, unsigned dst_node,
  562. void *_event)
  563. {
  564. int err, ret;
  565. size_t size;
  566. struct starpu_multiformat_interface *src_multiformat;
  567. struct starpu_multiformat_interface *dst_multiformat;
  568. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  569. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  570. STARPU_ASSERT(src_multiformat != NULL);
  571. STARPU_ASSERT(dst_multiformat != NULL);
  572. STARPU_ASSERT(src_multiformat->ops != NULL);
  573. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  574. err = starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->cpu_ptr,
  575. src_node,
  576. (cl_mem) dst_multiformat->cpu_ptr,
  577. dst_node,
  578. size,
  579. 0,
  580. (cl_event *) _event,
  581. &ret);
  582. if (STARPU_UNLIKELY(err))
  583. STARPU_OPENCL_REPORT_ERROR(err);
  584. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  585. return ret;
  586. }
  587. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  588. void *dst_interface, unsigned dst_node,
  589. void *_event)
  590. {
  591. int err, ret;
  592. size_t size;
  593. struct starpu_multiformat_interface *src_multiformat;
  594. struct starpu_multiformat_interface *dst_multiformat;
  595. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  596. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  597. STARPU_ASSERT(src_multiformat != NULL);
  598. STARPU_ASSERT(dst_multiformat != NULL);
  599. STARPU_ASSERT(src_multiformat->ops != NULL);
  600. STARPU_ASSERT(dst_multiformat->ops != NULL);
  601. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  602. if (dst_multiformat->opencl_ptr == NULL) {
  603. /* XXX : it is weird that we might have to allocate memory here... */
  604. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  605. }
  606. err = starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->opencl_ptr,
  607. src_node,
  608. dst_multiformat->opencl_ptr,
  609. dst_node,
  610. size,
  611. 0,
  612. (cl_event *)_event,
  613. &ret);
  614. if (STARPU_UNLIKELY(err))
  615. STARPU_OPENCL_REPORT_ERROR(err);
  616. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  617. return ret;
  618. }
  619. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  620. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  621. {
  622. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  623. }
  624. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  625. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  626. {
  627. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  628. }
  629. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  630. void *dst_interface, unsigned dst_node)
  631. {
  632. (void) src_interface;
  633. (void) dst_interface;
  634. (void) src_node;
  635. (void) dst_node;
  636. /* TODO */
  637. return 0;
  638. }
  639. #endif