multiformat_interface.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 Institut National de Recherche en Informatique et Automatique
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <datawizard/coherency.h>
  19. #include <datawizard/copy_driver.h>
  20. #include <datawizard/filters.h>
  21. #include <starpu_hash.h>
  22. #include <starpu_cuda.h>
  23. #include <starpu_opencl.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. #include <core/task.h>
  26. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  27. #ifdef STARPU_USE_CUDA
  28. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  30. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  31. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  32. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  33. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  34. #endif
  35. #ifdef STARPU_USE_OPENCL
  36. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  37. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  39. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  40. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  41. #endif
  42. static struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  43. {
  44. .ram_to_ram = copy_ram_to_ram,
  45. .ram_to_spu = NULL,
  46. #ifdef STARPU_USE_CUDA
  47. .ram_to_cuda = copy_ram_to_cuda,
  48. .cuda_to_ram = copy_cuda_to_ram,
  49. .ram_to_cuda_async = copy_ram_to_cuda_async,
  50. .cuda_to_ram_async = copy_cuda_to_ram_async,
  51. .cuda_to_cuda = copy_cuda_to_cuda,
  52. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  53. #endif
  54. #ifdef STARPU_USE_OPENCL
  55. .ram_to_opencl = copy_ram_to_opencl,
  56. .opencl_to_ram = copy_opencl_to_ram,
  57. .opencl_to_opencl = copy_opencl_to_opencl,
  58. .ram_to_opencl_async = copy_ram_to_opencl_async,
  59. .opencl_to_ram_async = copy_opencl_to_ram_async,
  60. #endif
  61. .cuda_to_spu = NULL,
  62. .spu_to_ram = NULL,
  63. .spu_to_cuda = NULL,
  64. .spu_to_spu = NULL
  65. };
  66. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  67. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  68. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  69. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  70. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  71. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  72. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  73. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  74. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  75. #ifdef STARPU_USE_GORDON
  76. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  77. #endif
  78. static struct starpu_multiformat_data_interface_ops*
  79. get_mf_ops(void *data_interface)
  80. {
  81. struct starpu_multiformat_interface *mf;
  82. mf = (struct starpu_multiformat_interface *) data_interface;
  83. return mf->ops;
  84. }
  85. static struct starpu_data_interface_ops interface_multiformat_ops =
  86. {
  87. .register_data_handle = register_multiformat_handle,
  88. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  89. .handle_to_pointer = multiformat_handle_to_pointer,
  90. .free_data_on_node = free_multiformat_buffer_on_node,
  91. .copy_methods = &multiformat_copy_data_methods_s,
  92. .get_size = multiformat_interface_get_size,
  93. .footprint = footprint_multiformat_interface_crc32,
  94. .compare = multiformat_compare,
  95. #ifdef STARPU_USE_GORDON
  96. .convert_to_gordon = NULL,
  97. #endif
  98. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  99. .interface_size = sizeof(struct starpu_multiformat_interface),
  100. .display = display_multiformat_interface,
  101. .is_multiformat = 1,
  102. .get_mf_ops = get_mf_ops
  103. };
  104. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  105. {
  106. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  107. struct starpu_multiformat_interface *multiformat_interface =
  108. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  109. switch(starpu_node_get_kind(node))
  110. {
  111. case STARPU_CPU_RAM:
  112. return multiformat_interface->cpu_ptr;
  113. #ifdef STARPU_USE_CUDA
  114. case STARPU_CUDA_RAM:
  115. return multiformat_interface->cuda_ptr;
  116. #endif
  117. #ifdef STARPU_USE_OPENCL
  118. case STARPU_OPENCL_RAM:
  119. return multiformat_interface->opencl_ptr;
  120. #endif
  121. default:
  122. STARPU_ABORT();
  123. }
  124. return NULL;
  125. }
  126. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  127. {
  128. struct starpu_multiformat_interface *multiformat_interface;
  129. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  130. unsigned node;
  131. for (node = 0; node < STARPU_MAXNODES; node++)
  132. {
  133. struct starpu_multiformat_interface *local_interface =
  134. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  135. if (node == home_node)
  136. {
  137. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  138. #ifdef STARPU_USE_CUDA
  139. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  140. #endif
  141. #ifdef STARPU_USE_OPENCL
  142. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  143. #endif
  144. }
  145. else
  146. {
  147. local_interface->cpu_ptr = NULL;
  148. #ifdef STARPU_USE_CUDA
  149. local_interface->cuda_ptr = NULL;
  150. #endif
  151. #ifdef STARPU_USE_OPENCL
  152. local_interface->opencl_ptr = NULL;
  153. #endif
  154. }
  155. local_interface->nx = multiformat_interface->nx;
  156. local_interface->ops = multiformat_interface->ops;
  157. }
  158. }
  159. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  160. uint32_t home_node,
  161. void *ptr,
  162. uint32_t nobjects,
  163. struct starpu_multiformat_data_interface_ops *format_ops)
  164. {
  165. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_opencl_cl);
  166. _starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
  167. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
  168. _starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
  169. struct starpu_multiformat_interface multiformat =
  170. {
  171. .cpu_ptr = ptr,
  172. .cuda_ptr = NULL,
  173. .opencl_ptr = NULL,
  174. .nx = nobjects,
  175. .ops = format_ops
  176. };
  177. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  178. }
  179. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  180. {
  181. return starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  182. }
  183. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  184. {
  185. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  186. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  187. return ((multiformat_a->nx == multiformat_b->nx)
  188. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  189. #ifdef STARPU_USE_CUDA
  190. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  191. #endif
  192. #ifdef STARPU_USE_OPENCL
  193. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  194. #endif
  195. );
  196. }
  197. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  198. {
  199. /* TODO */
  200. struct starpu_multiformat_interface *multiformat_interface;
  201. multiformat_interface = (struct starpu_multiformat_interface *)
  202. starpu_data_get_interface_on_node(handle, 0);
  203. fprintf(f, "%u\t", multiformat_interface->nx);
  204. }
  205. /* XXX : returns CPU size */
  206. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  207. {
  208. size_t size;
  209. struct starpu_multiformat_interface *multiformat_interface;
  210. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  211. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  212. return size;
  213. }
  214. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  215. {
  216. struct starpu_multiformat_interface *multiformat_interface;
  217. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
  218. return multiformat_interface->nx;
  219. }
  220. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  221. {
  222. struct starpu_multiformat_interface *multiformat_interface;
  223. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  224. enum starpu_node_kind kind = starpu_node_get_kind(node);
  225. switch(kind)
  226. {
  227. case STARPU_CPU_RAM:
  228. free(multiformat_interface->cpu_ptr);
  229. multiformat_interface->cpu_ptr = NULL;
  230. break;
  231. #ifdef STARPU_USE_CUDA
  232. case STARPU_CUDA_RAM:
  233. if (multiformat_interface->cpu_ptr)
  234. {
  235. cudaFree(multiformat_interface->cpu_ptr);
  236. multiformat_interface->cpu_ptr = NULL;
  237. }
  238. if (multiformat_interface->cuda_ptr)
  239. {
  240. cudaFree(multiformat_interface->cuda_ptr);
  241. multiformat_interface->cuda_ptr = NULL;
  242. }
  243. break;
  244. #endif
  245. #ifdef STARPU_USE_OPENCL
  246. case STARPU_OPENCL_RAM:
  247. STARPU_ASSERT_MSG(0, "XXX multiformat not supported on OpenCL yet (TODO)");
  248. break;
  249. #endif
  250. default:
  251. STARPU_ABORT();
  252. }
  253. }
  254. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  255. {
  256. struct starpu_multiformat_interface *multiformat_interface;
  257. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  258. unsigned fail = 0;
  259. uintptr_t addr = 0;
  260. ssize_t allocated_memory = 0;
  261. enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
  262. switch(kind)
  263. {
  264. case STARPU_CPU_RAM:
  265. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  266. addr = (uintptr_t)malloc(allocated_memory);
  267. if (!addr)
  268. {
  269. fail = 1;
  270. }
  271. else
  272. {
  273. multiformat_interface->cpu_ptr = (void *) addr;
  274. }
  275. #ifdef STARPU_USE_CUDA
  276. multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  277. STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
  278. #endif
  279. #ifdef STARPU_USE_OPENCL
  280. multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  281. STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
  282. #endif
  283. break;
  284. #ifdef STARPU_USE_CUDA
  285. case STARPU_CUDA_RAM:
  286. {
  287. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  288. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  289. if (STARPU_UNLIKELY(status))
  290. {
  291. STARPU_CUDA_REPORT_ERROR(status);
  292. }
  293. else
  294. {
  295. multiformat_interface->cuda_ptr = (void *)addr;
  296. }
  297. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  298. status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
  299. if (STARPU_UNLIKELY(status != cudaSuccess))
  300. STARPU_CUDA_REPORT_ERROR(status);
  301. break;
  302. }
  303. #endif
  304. #ifdef STARPU_USE_OPENCL
  305. case STARPU_OPENCL_RAM:
  306. {
  307. int ret;
  308. cl_mem ptr;
  309. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  310. ret = starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  311. addr = (uintptr_t)ptr;
  312. if (ret)
  313. {
  314. fail = 1;
  315. }
  316. else
  317. {
  318. multiformat_interface->opencl_ptr = (void *)addr;
  319. }
  320. ret = starpu_opencl_allocate_memory(&ptr,
  321. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
  322. CL_MEM_READ_WRITE);
  323. addr = (uintptr_t)ptr;
  324. if (ret)
  325. {
  326. fail = 1;
  327. }
  328. else
  329. {
  330. multiformat_interface->cpu_ptr = (void *) addr;
  331. }
  332. break;
  333. }
  334. #endif
  335. default:
  336. STARPU_ABORT();
  337. }
  338. if (fail)
  339. return -ENOMEM;
  340. return allocated_memory;
  341. }
  342. /*
  343. * Copy methods
  344. */
  345. static int copy_ram_to_ram(void *src_interface, unsigned src_node __attribute__ ((unused)),
  346. void *dst_interface, unsigned dst_node __attribute__ ((unused)))
  347. {
  348. struct starpu_multiformat_interface *src_multiformat;
  349. struct starpu_multiformat_interface *dst_multiformat;
  350. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  351. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  352. STARPU_ASSERT(src_multiformat != NULL);
  353. STARPU_ASSERT(dst_multiformat != NULL);
  354. STARPU_ASSERT(dst_multiformat->ops != NULL);
  355. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  356. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  357. return 0;
  358. }
  359. #ifdef STARPU_USE_CUDA
  360. static int copy_cuda_common(void *src_interface, unsigned src_node __attribute__ ((unused)),
  361. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  362. enum cudaMemcpyKind kind)
  363. {
  364. struct starpu_multiformat_interface *src_multiformat;
  365. struct starpu_multiformat_interface *dst_multiformat;
  366. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  367. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  368. size_t size;
  369. cudaError_t status;
  370. switch (kind)
  371. {
  372. case cudaMemcpyHostToDevice:
  373. {
  374. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  375. if (src_multiformat->cuda_ptr == NULL)
  376. {
  377. src_multiformat->cuda_ptr = malloc(size);
  378. if (src_multiformat->cuda_ptr == NULL)
  379. return -ENOMEM;
  380. }
  381. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  382. if (STARPU_UNLIKELY(status))
  383. {
  384. STARPU_CUDA_REPORT_ERROR(status);
  385. }
  386. break;
  387. }
  388. case cudaMemcpyDeviceToHost:
  389. {
  390. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  391. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  392. if (STARPU_UNLIKELY(status))
  393. STARPU_CUDA_REPORT_ERROR(status);
  394. break;
  395. }
  396. case cudaMemcpyDeviceToDevice:
  397. {
  398. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  399. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  400. if (STARPU_UNLIKELY(status))
  401. STARPU_CUDA_REPORT_ERROR(status);
  402. break;
  403. }
  404. default:
  405. STARPU_ABORT();
  406. }
  407. return 0;
  408. }
  409. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  410. {
  411. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  412. }
  413. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  414. {
  415. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  416. }
  417. static int copy_cuda_common_async(void *src_interface, unsigned src_node __attribute__ ((unused)),
  418. void *dst_interface, unsigned dst_node __attribute__ ((unused)),
  419. cudaStream_t stream, enum cudaMemcpyKind kind)
  420. {
  421. struct starpu_multiformat_interface *src_multiformat;
  422. struct starpu_multiformat_interface *dst_multiformat;
  423. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  424. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  425. size_t size;
  426. cudaError_t status;
  427. switch (kind)
  428. {
  429. case cudaMemcpyHostToDevice:
  430. {
  431. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  432. if (src_multiformat->cuda_ptr == NULL)
  433. {
  434. src_multiformat->cuda_ptr = malloc(size);
  435. if (src_multiformat->cuda_ptr == NULL)
  436. return -ENOMEM;
  437. }
  438. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  439. if (STARPU_UNLIKELY(status))
  440. {
  441. STARPU_CUDA_REPORT_ERROR(status);
  442. }
  443. break;
  444. }
  445. case cudaMemcpyDeviceToHost:
  446. {
  447. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  448. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  449. if (STARPU_UNLIKELY(status))
  450. STARPU_CUDA_REPORT_ERROR(status);
  451. break;
  452. }
  453. case cudaMemcpyDeviceToDevice:
  454. {
  455. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  456. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  457. if (STARPU_UNLIKELY(status))
  458. STARPU_CUDA_REPORT_ERROR(status);
  459. break;
  460. }
  461. default:
  462. STARPU_ABORT();
  463. }
  464. return 0;
  465. }
  466. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  467. {
  468. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  469. }
  470. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  471. {
  472. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  473. }
  474. #ifdef HAVE_CUDA_MEMCPY_PEER
  475. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  476. void *dst_interface, unsigned dst_node,
  477. cudaStream_t stream)
  478. {
  479. struct starpu_multiformat_interface *src_multiformat;
  480. struct starpu_multiformat_interface *dst_multiformat;
  481. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  482. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  483. STARPU_ASSERT(src_multiformat != NULL);
  484. STARPU_ASSERT(dst_multiformat != NULL);
  485. STARPU_ASSERT(src_multiformat->ops != NULL);
  486. cudaError_t status;
  487. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  488. int src_dev = _starpu_memory_node_to_devid(src_node);
  489. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  490. if (stream)
  491. {
  492. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  493. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  494. src_multiformat->cuda_ptr, src_dev,
  495. size, stream);
  496. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  497. /* All good ! Still, returning -EAGAIN, because we will need to
  498. check the transfert completion later */
  499. if (status == cudaSuccess)
  500. return -EAGAIN;
  501. }
  502. /* Either a synchronous transfert was requested, or the asynchronous one
  503. failed. */
  504. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  505. src_multiformat->cuda_ptr, src_dev,
  506. size);
  507. if (STARPU_UNLIKELY(status != cudaSuccess))
  508. STARPU_CUDA_REPORT_ERROR(status);
  509. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  510. return 0;
  511. }
  512. #endif
  513. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  514. {
  515. if (src_node == dst_node)
  516. {
  517. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  518. }
  519. else
  520. {
  521. #ifdef HAVE_CUDA_MEMCPY_PEER
  522. return copy_cuda_peer_common(src_interface, src_node,
  523. dst_interface, dst_node,
  524. NULL);
  525. #else
  526. STARPU_ABORT();
  527. #endif
  528. }
  529. }
  530. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  531. void *dst_interface, unsigned dst_node,
  532. cudaStream_t stream)
  533. {
  534. if (src_node == dst_node)
  535. {
  536. return copy_cuda_common_async(src_interface, src_node,
  537. dst_interface, dst_node,
  538. stream, cudaMemcpyDeviceToDevice);
  539. }
  540. else
  541. {
  542. #ifdef HAVE_CUDA_MEMCPY_PEER
  543. return copy_cuda_peer_common(src_interface, src_node,
  544. dst_interface, dst_node,
  545. stream);
  546. #else
  547. STARPU_ABORT();
  548. #endif
  549. }
  550. }
  551. #endif /* STARPU_USE_CUDA */
  552. #ifdef STARPU_USE_OPENCL
  553. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  554. void *dst_interface, unsigned dst_node,
  555. void *_event)
  556. {
  557. int err, ret;
  558. size_t size;
  559. struct starpu_multiformat_interface *src_multiformat;
  560. struct starpu_multiformat_interface *dst_multiformat;
  561. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  562. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  563. STARPU_ASSERT(src_multiformat != NULL);
  564. STARPU_ASSERT(dst_multiformat != NULL);
  565. STARPU_ASSERT(src_multiformat->ops != NULL);
  566. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  567. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  568. src_node,
  569. (cl_mem) dst_multiformat->cpu_ptr,
  570. dst_node,
  571. size,
  572. 0,
  573. (cl_event *) _event,
  574. &ret);
  575. if (STARPU_UNLIKELY(err))
  576. STARPU_OPENCL_REPORT_ERROR(err);
  577. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  578. return ret;
  579. }
  580. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  581. void *dst_interface, unsigned dst_node,
  582. void *_event)
  583. {
  584. int err, ret;
  585. size_t size;
  586. struct starpu_multiformat_interface *src_multiformat;
  587. struct starpu_multiformat_interface *dst_multiformat;
  588. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  589. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  590. STARPU_ASSERT(src_multiformat != NULL);
  591. STARPU_ASSERT(dst_multiformat != NULL);
  592. STARPU_ASSERT(src_multiformat->ops != NULL);
  593. STARPU_ASSERT(dst_multiformat->ops != NULL);
  594. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  595. if (dst_multiformat->opencl_ptr == NULL) {
  596. /* XXX : it is weird that we might have to allocate memory here... */
  597. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  598. }
  599. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  600. src_node,
  601. dst_multiformat->opencl_ptr,
  602. dst_node,
  603. size,
  604. 0,
  605. (cl_event *)_event,
  606. &ret);
  607. if (STARPU_UNLIKELY(err))
  608. STARPU_OPENCL_REPORT_ERROR(err);
  609. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  610. return ret;
  611. }
  612. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  613. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  614. {
  615. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  616. }
  617. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  618. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  619. {
  620. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  621. }
  622. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  623. void *dst_interface, unsigned dst_node)
  624. {
  625. (void) src_interface;
  626. (void) dst_interface;
  627. (void) src_node;
  628. (void) dst_node;
  629. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  630. return 0;
  631. }
  632. #endif