multiformat_interface.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 Institut National de Recherche en Informatique et Automatique
  4. * Copyright (C) 2012 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <starpu.h>
  18. #include <common/config.h>
  19. #include <datawizard/coherency.h>
  20. #include <datawizard/copy_driver.h>
  21. #include <datawizard/filters.h>
  22. #include <starpu_hash.h>
  23. #include <starpu_cuda.h>
  24. #include <starpu_opencl.h>
  25. #include <drivers/opencl/driver_opencl.h>
  26. #include <core/task.h>
  27. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. #ifdef STARPU_USE_CUDA
  29. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  30. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  31. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  32. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  34. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  35. #endif
  36. #ifdef STARPU_USE_OPENCL
  37. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  39. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  40. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  41. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  42. #endif
  43. static struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  44. {
  45. .ram_to_ram = copy_ram_to_ram,
  46. .ram_to_spu = NULL,
  47. #ifdef STARPU_USE_CUDA
  48. .ram_to_cuda = copy_ram_to_cuda,
  49. .cuda_to_ram = copy_cuda_to_ram,
  50. .ram_to_cuda_async = copy_ram_to_cuda_async,
  51. .cuda_to_ram_async = copy_cuda_to_ram_async,
  52. .cuda_to_cuda = copy_cuda_to_cuda,
  53. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  54. #endif
  55. #ifdef STARPU_USE_OPENCL
  56. .ram_to_opencl = copy_ram_to_opencl,
  57. .opencl_to_ram = copy_opencl_to_ram,
  58. .opencl_to_opencl = copy_opencl_to_opencl,
  59. .ram_to_opencl_async = copy_ram_to_opencl_async,
  60. .opencl_to_ram_async = copy_opencl_to_ram_async,
  61. #endif
  62. .cuda_to_spu = NULL,
  63. .spu_to_ram = NULL,
  64. .spu_to_cuda = NULL,
  65. .spu_to_spu = NULL
  66. };
  67. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  68. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  69. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  70. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  71. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  72. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  73. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  74. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  75. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  76. #ifdef STARPU_USE_GORDON
  77. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  78. #endif
  79. static struct starpu_multiformat_data_interface_ops*
  80. get_mf_ops(void *data_interface)
  81. {
  82. struct starpu_multiformat_interface *mf;
  83. mf = (struct starpu_multiformat_interface *) data_interface;
  84. return mf->ops;
  85. }
  86. static struct starpu_data_interface_ops interface_multiformat_ops =
  87. {
  88. .register_data_handle = register_multiformat_handle,
  89. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  90. .handle_to_pointer = multiformat_handle_to_pointer,
  91. .free_data_on_node = free_multiformat_buffer_on_node,
  92. .copy_methods = &multiformat_copy_data_methods_s,
  93. .get_size = multiformat_interface_get_size,
  94. .footprint = footprint_multiformat_interface_crc32,
  95. .compare = multiformat_compare,
  96. #ifdef STARPU_USE_GORDON
  97. .convert_to_gordon = NULL,
  98. #endif
  99. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  100. .interface_size = sizeof(struct starpu_multiformat_interface),
  101. .display = display_multiformat_interface,
  102. .is_multiformat = 1,
  103. .get_mf_ops = get_mf_ops
  104. };
  105. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  106. {
  107. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  108. struct starpu_multiformat_interface *multiformat_interface =
  109. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  110. switch(starpu_node_get_kind(node))
  111. {
  112. case STARPU_CPU_RAM:
  113. return multiformat_interface->cpu_ptr;
  114. #ifdef STARPU_USE_CUDA
  115. case STARPU_CUDA_RAM:
  116. return multiformat_interface->cuda_ptr;
  117. #endif
  118. #ifdef STARPU_USE_OPENCL
  119. case STARPU_OPENCL_RAM:
  120. return multiformat_interface->opencl_ptr;
  121. #endif
  122. default:
  123. STARPU_ABORT();
  124. }
  125. return NULL;
  126. }
  127. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  128. {
  129. struct starpu_multiformat_interface *multiformat_interface;
  130. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  131. unsigned node;
  132. for (node = 0; node < STARPU_MAXNODES; node++)
  133. {
  134. struct starpu_multiformat_interface *local_interface =
  135. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  136. if (node == home_node)
  137. {
  138. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  139. #ifdef STARPU_USE_CUDA
  140. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  141. #endif
  142. #ifdef STARPU_USE_OPENCL
  143. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  144. #endif
  145. }
  146. else
  147. {
  148. local_interface->cpu_ptr = NULL;
  149. #ifdef STARPU_USE_CUDA
  150. local_interface->cuda_ptr = NULL;
  151. #endif
  152. #ifdef STARPU_USE_OPENCL
  153. local_interface->opencl_ptr = NULL;
  154. #endif
  155. }
  156. local_interface->nx = multiformat_interface->nx;
  157. local_interface->ops = multiformat_interface->ops;
  158. }
  159. }
  160. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  161. uint32_t home_node,
  162. void *ptr,
  163. uint32_t nobjects,
  164. struct starpu_multiformat_data_interface_ops *format_ops)
  165. {
  166. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_opencl_cl);
  167. _starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
  168. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
  169. _starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
  170. struct starpu_multiformat_interface multiformat =
  171. {
  172. .cpu_ptr = ptr,
  173. .cuda_ptr = NULL,
  174. .opencl_ptr = NULL,
  175. .nx = nobjects,
  176. .ops = format_ops
  177. };
  178. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  179. }
  180. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  181. {
  182. return starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  183. }
  184. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  185. {
  186. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  187. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  188. return ((multiformat_a->nx == multiformat_b->nx)
  189. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  190. #ifdef STARPU_USE_CUDA
  191. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  192. #endif
  193. #ifdef STARPU_USE_OPENCL
  194. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  195. #endif
  196. );
  197. }
  198. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  199. {
  200. struct starpu_multiformat_interface *multiformat_interface;
  201. multiformat_interface = (struct starpu_multiformat_interface *)
  202. starpu_data_get_interface_on_node(handle, 0);
  203. fprintf(f, "%u\t", multiformat_interface->nx);
  204. }
  205. /* XXX : returns CPU size */
  206. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  207. {
  208. size_t size;
  209. struct starpu_multiformat_interface *multiformat_interface;
  210. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  211. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  212. return size;
  213. }
  214. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  215. {
  216. struct starpu_multiformat_interface *multiformat_interface;
  217. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  218. return multiformat_interface->nx;
  219. }
  220. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  221. {
  222. struct starpu_multiformat_interface *multiformat_interface;
  223. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  224. enum starpu_node_kind kind = starpu_node_get_kind(node);
  225. switch(kind)
  226. {
  227. case STARPU_CPU_RAM:
  228. free(multiformat_interface->cpu_ptr);
  229. multiformat_interface->cpu_ptr = NULL;
  230. break;
  231. #ifdef STARPU_USE_CUDA
  232. case STARPU_CUDA_RAM:
  233. if (multiformat_interface->cpu_ptr)
  234. {
  235. cudaFree(multiformat_interface->cpu_ptr);
  236. multiformat_interface->cpu_ptr = NULL;
  237. }
  238. if (multiformat_interface->cuda_ptr)
  239. {
  240. cudaFree(multiformat_interface->cuda_ptr);
  241. multiformat_interface->cuda_ptr = NULL;
  242. }
  243. break;
  244. #endif
  245. #ifdef STARPU_USE_OPENCL
  246. case STARPU_OPENCL_RAM:
  247. if (multiformat_interface->cpu_ptr)
  248. {
  249. cl_int err = clReleaseMemObject(multiformat_interface->cpu_ptr);
  250. if (err != CL_SUCCESS)
  251. STARPU_OPENCL_REPORT_ERROR(err);
  252. multiformat_interface->cpu_ptr = NULL;
  253. }
  254. if (multiformat_interface->opencl_ptr)
  255. {
  256. cl_int err = clReleaseMemObject(multiformat_interface->opencl_ptr);
  257. if (err != CL_SUCCESS)
  258. STARPU_OPENCL_REPORT_ERROR(err);
  259. multiformat_interface->opencl_ptr = NULL;
  260. }
  261. break;
  262. #endif
  263. default:
  264. STARPU_ABORT();
  265. }
  266. }
  267. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  268. {
  269. struct starpu_multiformat_interface *multiformat_interface;
  270. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  271. unsigned fail = 0;
  272. uintptr_t addr = 0;
  273. ssize_t allocated_memory = 0;
  274. enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
  275. switch(kind)
  276. {
  277. case STARPU_CPU_RAM:
  278. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  279. addr = (uintptr_t)malloc(allocated_memory);
  280. if (!addr)
  281. {
  282. fail = 1;
  283. }
  284. else
  285. {
  286. multiformat_interface->cpu_ptr = (void *) addr;
  287. }
  288. #ifdef STARPU_USE_CUDA
  289. multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  290. STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
  291. #endif
  292. #ifdef STARPU_USE_OPENCL
  293. multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  294. STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
  295. #endif
  296. break;
  297. #ifdef STARPU_USE_CUDA
  298. case STARPU_CUDA_RAM:
  299. {
  300. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  301. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  302. if (STARPU_UNLIKELY(status))
  303. {
  304. STARPU_CUDA_REPORT_ERROR(status);
  305. }
  306. else
  307. {
  308. multiformat_interface->cuda_ptr = (void *)addr;
  309. }
  310. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  311. status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
  312. if (STARPU_UNLIKELY(status != cudaSuccess))
  313. STARPU_CUDA_REPORT_ERROR(status);
  314. break;
  315. }
  316. #endif
  317. #ifdef STARPU_USE_OPENCL
  318. case STARPU_OPENCL_RAM:
  319. {
  320. int ret;
  321. cl_mem ptr;
  322. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  323. ret = starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  324. addr = (uintptr_t)ptr;
  325. if (ret)
  326. {
  327. fail = 1;
  328. }
  329. else
  330. {
  331. multiformat_interface->opencl_ptr = (void *)addr;
  332. }
  333. ret = starpu_opencl_allocate_memory(&ptr,
  334. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
  335. CL_MEM_READ_WRITE);
  336. addr = (uintptr_t)ptr;
  337. if (ret)
  338. {
  339. fail = 1;
  340. }
  341. else
  342. {
  343. multiformat_interface->cpu_ptr = (void *) addr;
  344. }
  345. break;
  346. }
  347. #endif
  348. default:
  349. STARPU_ABORT();
  350. }
  351. if (fail)
  352. return -ENOMEM;
  353. return allocated_memory;
  354. }
  355. /*
  356. * Copy methods
  357. */
  358. static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__ ((unused)),
  359. void *dst_interface, unsigned dst_node __attribute__ ((unused)))
  360. {
  361. struct starpu_multiformat_interface *src_multiformat;
  362. struct starpu_multiformat_interface *dst_multiformat;
  363. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  364. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  365. STARPU_ASSERT(src_multiformat != NULL);
  366. STARPU_ASSERT(dst_multiformat != NULL);
  367. STARPU_ASSERT(dst_multiformat->ops != NULL);
  368. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  369. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  370. return 0;
  371. }
  372. #ifdef STARPU_USE_CUDA
  373. static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__ ((unused)),
  374. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  375. enum cudaMemcpyKind kind)
  376. {
  377. struct starpu_multiformat_interface *src_multiformat;
  378. struct starpu_multiformat_interface *dst_multiformat;
  379. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  380. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  381. size_t size;
  382. cudaError_t status;
  383. switch (kind)
  384. {
  385. case cudaMemcpyHostToDevice:
  386. {
  387. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  388. if (src_multiformat->cuda_ptr == NULL)
  389. {
  390. src_multiformat->cuda_ptr = malloc(size);
  391. if (src_multiformat->cuda_ptr == NULL)
  392. return -ENOMEM;
  393. }
  394. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  395. if (STARPU_UNLIKELY(status))
  396. {
  397. STARPU_CUDA_REPORT_ERROR(status);
  398. }
  399. break;
  400. }
  401. case cudaMemcpyDeviceToHost:
  402. {
  403. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  404. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  405. if (STARPU_UNLIKELY(status))
  406. STARPU_CUDA_REPORT_ERROR(status);
  407. break;
  408. }
  409. case cudaMemcpyDeviceToDevice:
  410. {
  411. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  412. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  413. if (STARPU_UNLIKELY(status))
  414. STARPU_CUDA_REPORT_ERROR(status);
  415. break;
  416. }
  417. default:
  418. STARPU_ABORT();
  419. }
  420. return 0;
  421. }
  422. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  423. {
  424. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  425. }
  426. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  427. {
  428. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  429. }
  430. static int copy_cuda_common_async(void *src_interface, unsigned src_node __attribute__ ((unused)),
  431. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  432. cudaStream_t stream, enum cudaMemcpyKind kind)
  433. {
  434. struct starpu_multiformat_interface *src_multiformat;
  435. struct starpu_multiformat_interface *dst_multiformat;
  436. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  437. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  438. size_t size;
  439. cudaError_t status;
  440. switch (kind)
  441. {
  442. case cudaMemcpyHostToDevice:
  443. {
  444. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  445. if (src_multiformat->cuda_ptr == NULL)
  446. {
  447. src_multiformat->cuda_ptr = malloc(size);
  448. if (src_multiformat->cuda_ptr == NULL)
  449. return -ENOMEM;
  450. }
  451. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  452. if (STARPU_UNLIKELY(status))
  453. {
  454. STARPU_CUDA_REPORT_ERROR(status);
  455. }
  456. break;
  457. }
  458. case cudaMemcpyDeviceToHost:
  459. {
  460. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  461. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  462. if (STARPU_UNLIKELY(status))
  463. STARPU_CUDA_REPORT_ERROR(status);
  464. break;
  465. }
  466. case cudaMemcpyDeviceToDevice:
  467. {
  468. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  469. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  470. if (STARPU_UNLIKELY(status))
  471. STARPU_CUDA_REPORT_ERROR(status);
  472. break;
  473. }
  474. default:
  475. STARPU_ABORT();
  476. }
  477. return 0;
  478. }
  479. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  480. {
  481. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  482. }
  483. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  484. {
  485. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  486. }
  487. #ifdef HAVE_CUDA_MEMCPY_PEER
  488. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  489. void *dst_interface, unsigned dst_node,
  490. cudaStream_t stream)
  491. {
  492. struct starpu_multiformat_interface *src_multiformat;
  493. struct starpu_multiformat_interface *dst_multiformat;
  494. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  495. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  496. STARPU_ASSERT(src_multiformat != NULL);
  497. STARPU_ASSERT(dst_multiformat != NULL);
  498. STARPU_ASSERT(src_multiformat->ops != NULL);
  499. cudaError_t status;
  500. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  501. int src_dev = _starpu_memory_node_to_devid(src_node);
  502. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  503. if (stream)
  504. {
  505. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  506. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  507. src_multiformat->cuda_ptr, src_dev,
  508. size, stream);
  509. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  510. /* All good ! Still, returning -EAGAIN, because we will need to
  511. check the transfert completion later */
  512. if (status == cudaSuccess)
  513. return -EAGAIN;
  514. }
  515. /* Either a synchronous transfert was requested, or the asynchronous one
  516. failed. */
  517. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  518. src_multiformat->cuda_ptr, src_dev,
  519. size);
  520. if (STARPU_UNLIKELY(status != cudaSuccess))
  521. STARPU_CUDA_REPORT_ERROR(status);
  522. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  523. return 0;
  524. }
  525. #endif
  526. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  527. {
  528. if (src_node == dst_node)
  529. {
  530. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  531. }
  532. else
  533. {
  534. #ifdef HAVE_CUDA_MEMCPY_PEER
  535. return copy_cuda_peer_common(src_interface, src_node,
  536. dst_interface, dst_node,
  537. NULL);
  538. #else
  539. STARPU_ABORT();
  540. #endif
  541. }
  542. }
  543. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  544. void *dst_interface, unsigned dst_node,
  545. cudaStream_t stream)
  546. {
  547. if (src_node == dst_node)
  548. {
  549. return copy_cuda_common_async(src_interface, src_node,
  550. dst_interface, dst_node,
  551. stream, cudaMemcpyDeviceToDevice);
  552. }
  553. else
  554. {
  555. #ifdef HAVE_CUDA_MEMCPY_PEER
  556. return copy_cuda_peer_common(src_interface, src_node,
  557. dst_interface, dst_node,
  558. stream);
  559. #else
  560. STARPU_ABORT();
  561. #endif
  562. }
  563. }
  564. #endif /* STARPU_USE_CUDA */
  565. #ifdef STARPU_USE_OPENCL
  566. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  567. void *dst_interface, unsigned dst_node,
  568. cl_event *event)
  569. {
  570. int err, ret;
  571. size_t size;
  572. struct starpu_multiformat_interface *src_multiformat;
  573. struct starpu_multiformat_interface *dst_multiformat;
  574. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  575. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  576. STARPU_ASSERT(src_multiformat != NULL);
  577. STARPU_ASSERT(dst_multiformat != NULL);
  578. STARPU_ASSERT(src_multiformat->ops != NULL);
  579. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  580. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  581. src_node,
  582. (cl_mem) dst_multiformat->cpu_ptr,
  583. dst_node,
  584. size,
  585. 0,
  586. event,
  587. &ret);
  588. if (STARPU_UNLIKELY(err))
  589. STARPU_OPENCL_REPORT_ERROR(err);
  590. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  591. return ret;
  592. }
  593. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  594. void *dst_interface, unsigned dst_node,
  595. cl_event *event)
  596. {
  597. int err, ret;
  598. size_t size;
  599. struct starpu_multiformat_interface *src_multiformat;
  600. struct starpu_multiformat_interface *dst_multiformat;
  601. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  602. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  603. STARPU_ASSERT(src_multiformat != NULL);
  604. STARPU_ASSERT(dst_multiformat != NULL);
  605. STARPU_ASSERT(src_multiformat->ops != NULL);
  606. STARPU_ASSERT(dst_multiformat->ops != NULL);
  607. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  608. if (dst_multiformat->opencl_ptr == NULL) {
  609. /* XXX : it is weird that we might have to allocate memory here... */
  610. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  611. }
  612. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  613. src_node,
  614. dst_multiformat->opencl_ptr,
  615. dst_node,
  616. size,
  617. 0,
  618. event,
  619. &ret);
  620. if (STARPU_UNLIKELY(err))
  621. STARPU_OPENCL_REPORT_ERROR(err);
  622. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  623. return ret;
  624. }
  625. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  626. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  627. {
  628. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  629. }
  630. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  631. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  632. {
  633. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  634. }
  635. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  636. void *dst_interface, unsigned dst_node)
  637. {
  638. (void) src_interface;
  639. (void) dst_interface;
  640. (void) src_node;
  641. (void) dst_node;
  642. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  643. return 0;
  644. }
  645. #endif