multiformat_interface.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <datawizard/coherency.h>
  19. #include <datawizard/copy_driver.h>
  20. #include <datawizard/filters.h>
  21. #include <common/hash.h>
  22. #include <starpu_cuda.h>
  23. #include <starpu_opencl.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  26. #ifdef STARPU_USE_CUDA
  27. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  30. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  31. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  32. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. #endif
  34. #ifdef STARPU_USE_OPENCL
  35. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  36. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  37. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  39. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  40. #endif
  41. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  42. {
  43. .ram_to_ram = copy_ram_to_ram,
  44. .ram_to_spu = NULL,
  45. #ifdef STARPU_USE_CUDA
  46. .ram_to_cuda = copy_ram_to_cuda,
  47. .cuda_to_ram = copy_cuda_to_ram,
  48. .ram_to_cuda_async = copy_ram_to_cuda_async,
  49. .cuda_to_ram_async = copy_cuda_to_ram_async,
  50. .cuda_to_cuda = copy_cuda_to_cuda,
  51. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  52. #endif
  53. #ifdef STARPU_USE_OPENCL
  54. .ram_to_opencl = copy_ram_to_opencl,
  55. .opencl_to_ram = copy_opencl_to_ram,
  56. .opencl_to_opencl = copy_opencl_to_opencl,
  57. .ram_to_opencl_async = copy_ram_to_opencl_async,
  58. .opencl_to_ram_async = copy_opencl_to_ram_async,
  59. #endif
  60. .cuda_to_spu = NULL,
  61. .spu_to_ram = NULL,
  62. .spu_to_cuda = NULL,
  63. .spu_to_spu = NULL
  64. };
  65. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  66. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  67. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  68. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  69. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  70. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  71. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  72. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  73. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  74. #ifdef STARPU_USE_GORDON
  75. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  76. #endif
  77. static struct starpu_data_interface_ops interface_multiformat_ops =
  78. {
  79. .register_data_handle = register_multiformat_handle,
  80. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  81. .handle_to_pointer = multiformat_handle_to_pointer,
  82. .free_data_on_node = free_multiformat_buffer_on_node,
  83. .copy_methods = &multiformat_copy_data_methods_s,
  84. .get_size = multiformat_interface_get_size,
  85. .footprint = footprint_multiformat_interface_crc32,
  86. .compare = multiformat_compare,
  87. #ifdef STARPU_USE_GORDON
  88. .convert_to_gordon = NULL,
  89. #endif
  90. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  91. .interface_size = sizeof(struct starpu_multiformat_interface),
  92. .display = display_multiformat_interface
  93. };
  94. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  95. {
  96. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  97. struct starpu_multiformat_interface *multiformat_interface =
  98. starpu_data_get_interface_on_node(handle, node);
  99. switch(_starpu_get_node_kind(node))
  100. {
  101. case STARPU_CPU_RAM:
  102. return multiformat_interface->cpu_ptr;
  103. #ifdef STARPU_USE_CUDA
  104. case STARPU_CUDA_RAM:
  105. return multiformat_interface->cuda_ptr;
  106. #endif
  107. #ifdef STARPU_USE_OPENCL
  108. case STARPU_OPENCL_RAM:
  109. return multiformat_interface->opencl_ptr;
  110. #endif
  111. default:
  112. STARPU_ASSERT(0);
  113. }
  114. }
  115. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  116. {
  117. struct starpu_multiformat_interface *multiformat_interface;
  118. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  119. unsigned node;
  120. for (node = 0; node < STARPU_MAXNODES; node++)
  121. {
  122. struct starpu_multiformat_interface *local_interface =
  123. starpu_data_get_interface_on_node(handle, node);
  124. if (node == home_node)
  125. {
  126. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  127. #ifdef STARPU_USE_CUDA
  128. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  129. #endif
  130. #ifdef STARPU_USE_OPENCL
  131. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  132. #endif
  133. local_interface->dev_handle = multiformat_interface->dev_handle;
  134. local_interface->offset = multiformat_interface->offset;
  135. }
  136. else
  137. {
  138. local_interface->cpu_ptr = NULL;
  139. #ifdef STARPU_USE_CUDA
  140. local_interface->cuda_ptr = NULL;
  141. #endif
  142. #ifdef STARPU_USE_OPENCL
  143. local_interface->opencl_ptr = NULL;
  144. #endif
  145. local_interface->dev_handle = 0;
  146. local_interface->offset = 0;
  147. }
  148. local_interface->nx = multiformat_interface->nx;
  149. local_interface->ops = multiformat_interface->ops;
  150. local_interface->conversion_time = 0;
  151. }
  152. }
  153. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  154. uint32_t home_node,
  155. void *ptr,
  156. uint32_t nobjects,
  157. struct starpu_multiformat_data_interface_ops *format_ops)
  158. {
  159. struct starpu_multiformat_interface multiformat =
  160. {
  161. .cpu_ptr = ptr,
  162. #ifdef STARPU_USE_CUDA
  163. .cuda_ptr = NULL,
  164. #endif
  165. #ifdef STARPu_USE_OPENCL
  166. .opencl_ptr = NULL,
  167. #endif
  168. .nx = nobjects,
  169. .dev_handle = (uintptr_t) ptr,
  170. .offset = 0,
  171. .ops = format_ops
  172. };
  173. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  174. }
  175. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  176. {
  177. return _starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  178. }
  179. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  180. {
  181. struct starpu_multiformat_interface *multiformat_a = data_interface_a;
  182. struct starpu_multiformat_interface *multiformat_b = data_interface_b;
  183. return ((multiformat_a->nx == multiformat_b->nx)
  184. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  185. #ifdef STARPU_USE_CUDA
  186. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  187. #endif
  188. #if STARPU_USE_OPENCL
  189. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  190. #endif
  191. );
  192. }
  193. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  194. {
  195. /* TODO */
  196. struct starpu_multiformat_interface *multiformat_interface;
  197. multiformat_interface = (struct starpu_multiformat_interface *)
  198. starpu_data_get_interface_on_node(handle, 0);
  199. fprintf(f, "%u\t", multiformat_interface->nx);
  200. }
  201. /* XXX : returns CPU size */
  202. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  203. {
  204. size_t size;
  205. struct starpu_multiformat_interface *multiformat_interface;
  206. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  207. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  208. return size;
  209. }
  210. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  211. {
  212. struct starpu_multiformat_interface *multiformat_interface;
  213. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  214. return multiformat_interface->nx;
  215. }
  216. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  217. {
  218. struct starpu_multiformat_interface *multiformat_interface;
  219. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  220. enum _starpu_node_kind kind = _starpu_get_node_kind(node);
  221. switch(kind)
  222. {
  223. case STARPU_CPU_RAM:
  224. free(multiformat_interface->cpu_ptr);
  225. multiformat_interface->cpu_ptr = NULL;
  226. break;
  227. #ifdef STARPU_USE_CUDA
  228. case STARPU_CUDA_RAM:
  229. cudaFree(multiformat_interface->cuda_ptr);
  230. multiformat_interface->cuda_ptr = NULL;
  231. break;
  232. #endif
  233. #ifdef STARPU_USE_OPENCL
  234. case STARPU_OPENCL_RAM:
  235. /* TODO */
  236. break;
  237. #endif
  238. default:
  239. STARPU_ABORT();
  240. }
  241. }
  242. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  243. {
  244. struct starpu_multiformat_interface *multiformat_interface;
  245. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  246. unsigned fail = 0;
  247. uintptr_t addr = 0;
  248. ssize_t allocated_memory;
  249. enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
  250. switch(kind)
  251. {
  252. case STARPU_CPU_RAM:
  253. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  254. addr = (uintptr_t)malloc(allocated_memory);
  255. if (!addr)
  256. {
  257. fail = 1;
  258. }
  259. else
  260. {
  261. multiformat_interface->cpu_ptr = (void *) addr;
  262. multiformat_interface->dev_handle = addr;
  263. }
  264. break;
  265. #ifdef STARPU_USE_CUDA
  266. case STARPU_CUDA_RAM:
  267. {
  268. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  269. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  270. if (STARPU_UNLIKELY(status))
  271. {
  272. STARPU_CUDA_REPORT_ERROR(status);
  273. }
  274. else
  275. {
  276. multiformat_interface->cuda_ptr = (void *)addr;
  277. multiformat_interface->dev_handle = addr;
  278. }
  279. break;
  280. }
  281. #endif
  282. #ifdef STARPU_USE_OPENCL
  283. case STARPU_OPENCL_RAM:
  284. {
  285. int ret;
  286. void *ptr;
  287. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  288. ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  289. addr = (uintptr_t)ptr;
  290. if (ret)
  291. {
  292. fail = 1;
  293. }
  294. else
  295. {
  296. multiformat_interface->opencl_ptr = (void *)addr;
  297. multiformat_interface->dev_handle = addr;
  298. }
  299. break;
  300. }
  301. #endif
  302. default:
  303. STARPU_ASSERT(0);
  304. }
  305. if (fail)
  306. return -ENOMEM;
  307. multiformat_interface->offset = 0;
  308. return allocated_memory;
  309. }
  310. /*
  311. * Copy methods
  312. */
  313. static int copy_ram_to_ram(void *src_interface, unsigned src_node,
  314. void *dst_interface, unsigned dst_node)
  315. {
  316. struct starpu_multiformat_interface *src_multiformat;
  317. struct starpu_multiformat_interface *dst_multiformat;
  318. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  319. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  320. STARPU_ASSERT(src_multiformat != NULL);
  321. STARPU_ASSERT(dst_multiformat != NULL);
  322. STARPU_ASSERT(dst_multiformat->ops != NULL);
  323. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  324. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  325. return 0;
  326. }
  327. #ifdef STARPU_USE_CUDA
  328. static int copy_cuda_common(void *src_interface, unsigned src_node,
  329. void *dst_interface, unsigned dst_node,
  330. enum cudaMemcpyKind kind)
  331. {
  332. struct starpu_multiformat_interface *src_multiformat;
  333. struct starpu_multiformat_interface *dst_multiformat;
  334. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  335. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  336. size_t size;
  337. cudaError_t status;
  338. switch (kind)
  339. {
  340. case cudaMemcpyHostToDevice:
  341. {
  342. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  343. if (src_multiformat->cuda_ptr == NULL)
  344. {
  345. src_multiformat->cuda_ptr = malloc(size);
  346. if (src_multiformat->cuda_ptr == NULL)
  347. return -ENOMEM;
  348. }
  349. /* Converting data , from host to host */
  350. double tmp = starpu_timing_now();
  351. void *buffers[1];
  352. buffers[0] = src_interface;
  353. struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
  354. cl->cpu_func(buffers, NULL);
  355. dst_multiformat->conversion_time = starpu_timing_now() - tmp;
  356. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  357. if (STARPU_UNLIKELY(status))
  358. {
  359. STARPU_CUDA_REPORT_ERROR(status);
  360. }
  361. break;
  362. }
  363. case cudaMemcpyDeviceToHost:
  364. {
  365. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  366. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  367. if (STARPU_UNLIKELY(status))
  368. STARPU_CUDA_REPORT_ERROR(status);
  369. void *buffers[1];
  370. struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
  371. buffers[0] = dst_interface;
  372. cl->cpu_func(buffers, NULL);
  373. break;
  374. }
  375. case cudaMemcpyDeviceToDevice:
  376. {
  377. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  378. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  379. if (STARPU_UNLIKELY(status))
  380. STARPU_CUDA_REPORT_ERROR(status);
  381. break;
  382. }
  383. default:
  384. STARPU_ASSERT(0);
  385. }
  386. return 0;
  387. }
  388. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  389. {
  390. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  391. }
  392. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  393. {
  394. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  395. }
  396. static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
  397. {
  398. struct starpu_multiformat_interface *src_multiformat;
  399. struct starpu_multiformat_interface *dst_multiformat;
  400. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  401. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  402. size_t size;
  403. cudaError_t status;
  404. switch (kind)
  405. {
  406. case cudaMemcpyHostToDevice:
  407. {
  408. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  409. if (src_multiformat->cuda_ptr == NULL)
  410. {
  411. src_multiformat->cuda_ptr = malloc(size);
  412. if (src_multiformat->cuda_ptr == NULL)
  413. return -ENOMEM;
  414. }
  415. /* Converting data , from host to host */
  416. double tmp = starpu_timing_now();
  417. void *buffers[1]; // XXX
  418. buffers[0] = src_interface;
  419. struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
  420. cl->cpu_func(buffers, NULL);
  421. dst_multiformat->conversion_time = starpu_timing_now() - tmp;
  422. /* Actual copy from host to device */
  423. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  424. if (STARPU_UNLIKELY(status))
  425. {
  426. STARPU_CUDA_REPORT_ERROR(status);
  427. }
  428. break;
  429. }
  430. case cudaMemcpyDeviceToHost:
  431. {
  432. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  433. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  434. if (STARPU_UNLIKELY(status))
  435. STARPU_CUDA_REPORT_ERROR(status);
  436. /* Converting data */
  437. void *buffers[1];
  438. struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
  439. buffers[0] = dst_interface;
  440. cl->cpu_func(buffers, NULL);
  441. break;
  442. }
  443. case cudaMemcpyDeviceToDevice:
  444. {
  445. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  446. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  447. if (STARPU_UNLIKELY(status))
  448. STARPU_CUDA_REPORT_ERROR(status);
  449. break;
  450. }
  451. default:
  452. STARPU_ASSERT(0);
  453. }
  454. return 0;
  455. }
  456. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  457. {
  458. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  459. }
  460. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  461. {
  462. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  463. }
  464. #ifdef HAVE_CUDA_MEMCPY_PEER
  465. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  466. void *dst_interface, unsigned dst_node,
  467. cudaStream_t stream)
  468. {
  469. struct starpu_multiformat_interface *src_multiformat;
  470. struct starpu_multiformat_interface *dst_multiformat;
  471. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  472. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  473. STARPU_ASSERT(src_multiformat != NULL);
  474. STARPU_ASSERT(dst_multiformat != NULL);
  475. STARPU_ASSERT(src_multiformat->ops != NULL);
  476. cudaError_t status;
  477. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  478. int src_dev = _starpu_memory_node_to_devid(src_node);
  479. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  480. if (stream)
  481. {
  482. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  483. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  484. src_multiformat->cuda_ptr, src_dev,
  485. size, stream);
  486. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  487. /* All good ! Still, returning -EAGAIN, because we will need to
  488. check the transfert completion later */
  489. if (status == cudaSuccess)
  490. return -EAGAIN;
  491. }
  492. /* Either a synchronous transfert was requested, or the asynchronous one
  493. failed. */
  494. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  495. src_multiformat->cuda_ptr, src_dev,
  496. size);
  497. if (STARPU_UNLIKELY(status != cudaSuccess))
  498. STARPU_CUDA_REPORT_ERROR(status);
  499. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  500. return 0;
  501. }
  502. #endif
  503. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  504. {
  505. if (src_node == dst_node)
  506. {
  507. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  508. }
  509. else
  510. {
  511. #ifdef HAVE_CUDA_MEMCPY_PEER
  512. return copy_cuda_peer_common(src_interface, src_node,
  513. dst_interface, dst_node,
  514. NULL);
  515. #else
  516. STARPU_ASSERT(0);
  517. #endif
  518. }
  519. }
  520. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  521. void *dst_interface, unsigned dst_node,
  522. cudaStream_t stream)
  523. {
  524. if (src_node == dst_node)
  525. {
  526. return copy_cuda_common_async(src_interface, src_node,
  527. dst_interface, dst_node,
  528. stream, cudaMemcpyDeviceToDevice);
  529. }
  530. else
  531. {
  532. #ifdef HAVE_CUDA_MEMCPY_PEER
  533. return copy_cuda_peer_common(src_interface, src_node,
  534. dst_interface, dst_node,
  535. stream);
  536. #else
  537. STARPU_ASSERT(0);
  538. #endif
  539. }
  540. }
  541. #endif /* STARPU_USE_CUDA */
  542. #ifdef STARPU_USE_OPENCL
  543. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  544. void *dst_interface, unsigned dst_node,
  545. void *_event)
  546. {
  547. int err, ret;
  548. size_t size;
  549. struct starpu_multiformat_interface *src_multiformat;
  550. struct starpu_multiformat_interface *dst_multiformat;
  551. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  552. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  553. STARPU_ASSERT(src_multiformat != NULL);
  554. STARPU_ASSERT(dst_multiformat != NULL);
  555. STARPU_ASSERT(src_multiformat->ops != NULL);
  556. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  557. if (src_multiformat->opencl_ptr == NULL)
  558. {
  559. src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
  560. if (src_multiformat->opencl_ptr == NULL)
  561. {
  562. return -ENOMEM;
  563. }
  564. }
  565. double tmp = starpu_timing_now();
  566. void *buffers[1];
  567. struct starpu_codelet *cl = src_multiformat->ops->cpu_to_opencl_cl;
  568. buffers[0] = src_interface;
  569. cl->cpu_func(buffers, NULL);
  570. dst_multiformat->conversion_time = starpu_timing_now() - tmp;
  571. err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->opencl_ptr,
  572. src_node,
  573. (cl_mem) dst_multiformat->dev_handle,
  574. dst_node,
  575. size,
  576. dst_multiformat->offset,
  577. (cl_event *) _event,
  578. &ret);
  579. if (STARPU_UNLIKELY(err))
  580. STARPU_OPENCL_REPORT_ERROR(err);
  581. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  582. return ret;
  583. }
  584. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  585. void *dst_interface, unsigned dst_node,
  586. void *_event)
  587. {
  588. int err, ret;
  589. size_t size;
  590. struct starpu_multiformat_interface *src_multiformat;
  591. struct starpu_multiformat_interface *dst_multiformat;
  592. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  593. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  594. STARPU_ASSERT(src_multiformat != NULL);
  595. STARPU_ASSERT(dst_multiformat != NULL);
  596. STARPU_ASSERT(src_multiformat->ops != NULL);
  597. STARPU_ASSERT(dst_multiformat->ops != NULL);
  598. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize,
  599. err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->dev_handle,
  600. src_node,
  601. dst_multiformat->opencl_ptr,
  602. dst_node,
  603. size,
  604. src_multiformat->offset,
  605. (cl_event *)_event,
  606. &ret);
  607. if (STARPU_UNLIKELY(err))
  608. STARPU_OPENCL_REPORT_ERROR(err);
  609. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  610. /* XXX So much for asynchronicity */
  611. clWaitForEvents(1, _event);
  612. void *buffers[1];
  613. struct starpu_codelet *cl = src_multiformat->ops->opencl_to_cpu_cl;
  614. buffers[0] = dst_interface;
  615. cl->cpu_func(buffers, NULL);
  616. return ret;
  617. }
  618. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  619. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  620. {
  621. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  622. }
  623. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  624. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  625. {
  626. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  627. }
  628. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  629. void *dst_interface, unsigned dst_node)
  630. {
  631. /* TODO */
  632. }
  633. #endif