multiformat_interface.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <datawizard/coherency.h>
  19. #include <datawizard/copy_driver.h>
  20. #include <datawizard/filters.h>
  21. #include <common/hash.h>
  22. #include <starpu_cuda.h>
  23. #include <starpu_opencl.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  26. #ifdef STARPU_USE_CUDA
  27. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  30. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  31. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  32. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. #endif
  34. #ifdef STARPU_USE_OPENCL
  35. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  36. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  37. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  39. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  40. #endif
  41. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  42. {
  43. .ram_to_ram = copy_ram_to_ram,
  44. .ram_to_spu = NULL,
  45. #ifdef STARPU_USE_CUDA
  46. .ram_to_cuda = copy_ram_to_cuda,
  47. .cuda_to_ram = copy_cuda_to_ram,
  48. .ram_to_cuda_async = copy_ram_to_cuda_async,
  49. .cuda_to_ram_async = copy_cuda_to_ram_async,
  50. .cuda_to_cuda = copy_cuda_to_cuda,
  51. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  52. #endif
  53. #ifdef STARPU_USE_OPENCL
  54. .ram_to_opencl = copy_ram_to_opencl,
  55. .opencl_to_ram = copy_opencl_to_ram,
  56. .opencl_to_opencl = copy_opencl_to_opencl,
  57. .ram_to_opencl_async = copy_ram_to_opencl_async,
  58. .opencl_to_ram_async = copy_opencl_to_ram_async,
  59. #endif
  60. .cuda_to_spu = NULL,
  61. .spu_to_ram = NULL,
  62. .spu_to_cuda = NULL,
  63. .spu_to_spu = NULL
  64. };
  65. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  66. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  67. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  68. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  69. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  70. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  71. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  72. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  73. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  74. #ifdef STARPU_USE_GORDON
  75. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  76. #endif
  77. static struct starpu_data_interface_ops interface_multiformat_ops =
  78. {
  79. .register_data_handle = register_multiformat_handle,
  80. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  81. .handle_to_pointer = multiformat_handle_to_pointer,
  82. .free_data_on_node = free_multiformat_buffer_on_node,
  83. .copy_methods = &multiformat_copy_data_methods_s,
  84. .get_size = multiformat_interface_get_size,
  85. .footprint = footprint_multiformat_interface_crc32,
  86. .compare = multiformat_compare,
  87. #ifdef STARPU_USE_GORDON
  88. .convert_to_gordon = NULL,
  89. #endif
  90. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  91. .interface_size = sizeof(struct starpu_multiformat_interface),
  92. .display = display_multiformat_interface
  93. };
  94. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  95. {
  96. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  97. struct starpu_multiformat_interface *multiformat_interface =
  98. starpu_data_get_interface_on_node(handle, node);
  99. switch(_starpu_get_node_kind(node))
  100. {
  101. case STARPU_CPU_RAM:
  102. return multiformat_interface->cpu_ptr;
  103. #ifdef STARPU_USE_CUDA
  104. case STARPU_CUDA_RAM:
  105. return multiformat_interface->cuda_ptr;
  106. #endif
  107. #ifdef STARPU_USE_OPENCL
  108. case STARPU_OPENCL_RAM:
  109. return multiformat_interface->opencl_ptr;
  110. #endif
  111. default:
  112. STARPU_ASSERT(0);
  113. }
  114. }
  115. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  116. {
  117. struct starpu_multiformat_interface *multiformat_interface;
  118. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  119. unsigned node;
  120. for (node = 0; node < STARPU_MAXNODES; node++)
  121. {
  122. struct starpu_multiformat_interface *local_interface =
  123. starpu_data_get_interface_on_node(handle, node);
  124. if (node == home_node)
  125. {
  126. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  127. #ifdef STARPU_USE_CUDA
  128. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  129. #endif
  130. #ifdef STARPU_USE_OPENCL
  131. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  132. #endif
  133. local_interface->dev_handle = multiformat_interface->dev_handle;
  134. local_interface->offset = multiformat_interface->offset;
  135. }
  136. else
  137. {
  138. local_interface->cpu_ptr = NULL;
  139. #ifdef STARPU_USE_CUDA
  140. local_interface->cuda_ptr = NULL;
  141. #endif
  142. #ifdef STARPU_USE_OPENCL
  143. local_interface->opencl_ptr = NULL;
  144. #endif
  145. local_interface->dev_handle = 0;
  146. local_interface->offset = 0;
  147. }
  148. local_interface->nx = multiformat_interface->nx;
  149. local_interface->ops = multiformat_interface->ops;
  150. }
  151. }
  152. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  153. uint32_t home_node,
  154. void *ptr,
  155. uint32_t nobjects,
  156. struct starpu_multiformat_data_interface_ops *format_ops)
  157. {
  158. struct starpu_multiformat_interface multiformat =
  159. {
  160. .cpu_ptr = ptr,
  161. #ifdef STARPU_USE_CUDA
  162. .cuda_ptr = NULL,
  163. #endif
  164. #ifdef STARPu_USE_OPENCL
  165. .opencl_ptr = NULL,
  166. #endif
  167. .nx = nobjects,
  168. .dev_handle = (uintptr_t) ptr,
  169. .offset = 0,
  170. .ops = format_ops
  171. };
  172. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  173. }
  174. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  175. {
  176. return _starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  177. }
  178. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  179. {
  180. struct starpu_multiformat_interface *multiformat_a = data_interface_a;
  181. struct starpu_multiformat_interface *multiformat_b = data_interface_b;
  182. return ((multiformat_a->nx == multiformat_b->nx)
  183. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  184. #ifdef STARPU_USE_CUDA
  185. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  186. #endif
  187. #if STARPU_USE_OPENCL
  188. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  189. #endif
  190. );
  191. }
  192. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  193. {
  194. /* TODO */
  195. struct starpu_multiformat_interface *multiformat_interface;
  196. multiformat_interface = (struct starpu_multiformat_interface *)
  197. starpu_data_get_interface_on_node(handle, 0);
  198. fprintf(f, "%u\t", multiformat_interface->nx);
  199. }
  200. /* XXX : returns CPU size */
  201. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  202. {
  203. size_t size;
  204. struct starpu_multiformat_interface *multiformat_interface;
  205. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  206. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  207. return size;
  208. }
  209. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  210. {
  211. struct starpu_multiformat_interface *multiformat_interface;
  212. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  213. return multiformat_interface->nx;
  214. }
  215. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  216. {
  217. struct starpu_multiformat_interface *multiformat_interface;
  218. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  219. enum _starpu_node_kind kind = _starpu_get_node_kind(node);
  220. switch(kind)
  221. {
  222. case STARPU_CPU_RAM:
  223. free(multiformat_interface->cpu_ptr);
  224. multiformat_interface->cpu_ptr = NULL;
  225. break;
  226. #ifdef STARPU_USE_CUDA
  227. case STARPU_CUDA_RAM:
  228. if (multiformat_interface->cpu_ptr)
  229. {
  230. cudaFree(multiformat_interface->cpu_ptr);
  231. multiformat_interface->cpu_ptr = NULL;
  232. }
  233. if (multiformat_interface->cuda_ptr)
  234. {
  235. cudaFree(multiformat_interface->cuda_ptr);
  236. multiformat_interface->cuda_ptr = NULL;
  237. }
  238. break;
  239. #endif
  240. #ifdef STARPU_USE_OPENCL
  241. case STARPU_OPENCL_RAM:
  242. /* TODO */
  243. break;
  244. #endif
  245. default:
  246. STARPU_ABORT();
  247. }
  248. }
  249. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  250. {
  251. struct starpu_multiformat_interface *multiformat_interface;
  252. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  253. unsigned fail = 0;
  254. uintptr_t addr = 0;
  255. ssize_t allocated_memory;
  256. enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
  257. switch(kind)
  258. {
  259. case STARPU_CPU_RAM:
  260. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  261. addr = (uintptr_t)malloc(allocated_memory);
  262. if (!addr)
  263. {
  264. fail = 1;
  265. }
  266. else
  267. {
  268. multiformat_interface->cpu_ptr = (void *) addr;
  269. multiformat_interface->dev_handle = addr;
  270. }
  271. #ifdef STARPU_USE_CUDA
  272. multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  273. STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
  274. #endif
  275. #ifdef STARPU_USE_OPENCL
  276. multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  277. STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
  278. #endif
  279. break;
  280. #ifdef STARPU_USE_CUDA
  281. case STARPU_CUDA_RAM:
  282. {
  283. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  284. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  285. if (STARPU_UNLIKELY(status))
  286. {
  287. STARPU_CUDA_REPORT_ERROR(status);
  288. }
  289. else
  290. {
  291. multiformat_interface->cuda_ptr = (void *)addr;
  292. multiformat_interface->dev_handle = addr;
  293. }
  294. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  295. status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
  296. break;
  297. }
  298. #endif
  299. #ifdef STARPU_USE_OPENCL
  300. case STARPU_OPENCL_RAM:
  301. {
  302. int ret;
  303. void *ptr;
  304. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  305. ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  306. addr = (uintptr_t)ptr;
  307. if (ret)
  308. {
  309. fail = 1;
  310. }
  311. else
  312. {
  313. multiformat_interface->opencl_ptr = (void *)addr;
  314. multiformat_interface->dev_handle = addr;
  315. }
  316. _starpu_opencl_allocate_memory(&multiformat_interface->cpu_ptr,
  317. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
  318. CL_MEM_READ_WRITE);
  319. break;
  320. }
  321. #endif
  322. default:
  323. STARPU_ASSERT(0);
  324. }
  325. if (fail)
  326. return -ENOMEM;
  327. multiformat_interface->offset = 0;
  328. return allocated_memory;
  329. }
  330. /*
  331. * Copy methods
  332. */
  333. static int copy_ram_to_ram(void *src_interface, unsigned src_node,
  334. void *dst_interface, unsigned dst_node)
  335. {
  336. struct starpu_multiformat_interface *src_multiformat;
  337. struct starpu_multiformat_interface *dst_multiformat;
  338. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  339. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  340. STARPU_ASSERT(src_multiformat != NULL);
  341. STARPU_ASSERT(dst_multiformat != NULL);
  342. STARPU_ASSERT(dst_multiformat->ops != NULL);
  343. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  344. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  345. return 0;
  346. }
  347. #ifdef STARPU_USE_CUDA
  348. static int copy_cuda_common(void *src_interface, unsigned src_node,
  349. void *dst_interface, unsigned dst_node,
  350. enum cudaMemcpyKind kind)
  351. {
  352. struct starpu_multiformat_interface *src_multiformat;
  353. struct starpu_multiformat_interface *dst_multiformat;
  354. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  355. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  356. size_t size;
  357. cudaError_t status;
  358. switch (kind)
  359. {
  360. case cudaMemcpyHostToDevice:
  361. {
  362. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  363. if (src_multiformat->cuda_ptr == NULL)
  364. {
  365. src_multiformat->cuda_ptr = malloc(size);
  366. if (src_multiformat->cuda_ptr == NULL)
  367. return -ENOMEM;
  368. }
  369. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  370. if (STARPU_UNLIKELY(status))
  371. {
  372. STARPU_CUDA_REPORT_ERROR(status);
  373. }
  374. break;
  375. }
  376. case cudaMemcpyDeviceToHost:
  377. {
  378. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  379. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  380. if (STARPU_UNLIKELY(status))
  381. STARPU_CUDA_REPORT_ERROR(status);
  382. break;
  383. }
  384. case cudaMemcpyDeviceToDevice:
  385. {
  386. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  387. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  388. if (STARPU_UNLIKELY(status))
  389. STARPU_CUDA_REPORT_ERROR(status);
  390. break;
  391. }
  392. default:
  393. STARPU_ASSERT(0);
  394. }
  395. return 0;
  396. }
  397. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  398. {
  399. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  400. }
  401. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  402. {
  403. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  404. }
  405. static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
  406. {
  407. struct starpu_multiformat_interface *src_multiformat;
  408. struct starpu_multiformat_interface *dst_multiformat;
  409. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  410. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  411. size_t size;
  412. cudaError_t status;
  413. switch (kind)
  414. {
  415. case cudaMemcpyHostToDevice:
  416. {
  417. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  418. if (src_multiformat->cuda_ptr == NULL)
  419. {
  420. src_multiformat->cuda_ptr = malloc(size);
  421. if (src_multiformat->cuda_ptr == NULL)
  422. return -ENOMEM;
  423. }
  424. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  425. if (STARPU_UNLIKELY(status))
  426. {
  427. STARPU_CUDA_REPORT_ERROR(status);
  428. }
  429. break;
  430. }
  431. case cudaMemcpyDeviceToHost:
  432. {
  433. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  434. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  435. if (STARPU_UNLIKELY(status))
  436. STARPU_CUDA_REPORT_ERROR(status);
  437. break;
  438. }
  439. case cudaMemcpyDeviceToDevice:
  440. {
  441. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  442. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  443. if (STARPU_UNLIKELY(status))
  444. STARPU_CUDA_REPORT_ERROR(status);
  445. break;
  446. }
  447. default:
  448. STARPU_ASSERT(0);
  449. }
  450. return 0;
  451. }
  452. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  453. {
  454. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  455. }
  456. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  457. {
  458. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  459. }
  460. #ifdef HAVE_CUDA_MEMCPY_PEER
  461. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  462. void *dst_interface, unsigned dst_node,
  463. cudaStream_t stream)
  464. {
  465. struct starpu_multiformat_interface *src_multiformat;
  466. struct starpu_multiformat_interface *dst_multiformat;
  467. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  468. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  469. STARPU_ASSERT(src_multiformat != NULL);
  470. STARPU_ASSERT(dst_multiformat != NULL);
  471. STARPU_ASSERT(src_multiformat->ops != NULL);
  472. cudaError_t status;
  473. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  474. int src_dev = _starpu_memory_node_to_devid(src_node);
  475. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  476. if (stream)
  477. {
  478. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  479. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  480. src_multiformat->cuda_ptr, src_dev,
  481. size, stream);
  482. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  483. /* All good ! Still, returning -EAGAIN, because we will need to
  484. check the transfert completion later */
  485. if (status == cudaSuccess)
  486. return -EAGAIN;
  487. }
  488. /* Either a synchronous transfert was requested, or the asynchronous one
  489. failed. */
  490. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  491. src_multiformat->cuda_ptr, src_dev,
  492. size);
  493. if (STARPU_UNLIKELY(status != cudaSuccess))
  494. STARPU_CUDA_REPORT_ERROR(status);
  495. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  496. return 0;
  497. }
  498. #endif
  499. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  500. {
  501. if (src_node == dst_node)
  502. {
  503. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  504. }
  505. else
  506. {
  507. #ifdef HAVE_CUDA_MEMCPY_PEER
  508. return copy_cuda_peer_common(src_interface, src_node,
  509. dst_interface, dst_node,
  510. NULL);
  511. #else
  512. STARPU_ASSERT(0);
  513. #endif
  514. }
  515. }
  516. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  517. void *dst_interface, unsigned dst_node,
  518. cudaStream_t stream)
  519. {
  520. if (src_node == dst_node)
  521. {
  522. return copy_cuda_common_async(src_interface, src_node,
  523. dst_interface, dst_node,
  524. stream, cudaMemcpyDeviceToDevice);
  525. }
  526. else
  527. {
  528. #ifdef HAVE_CUDA_MEMCPY_PEER
  529. return copy_cuda_peer_common(src_interface, src_node,
  530. dst_interface, dst_node,
  531. stream);
  532. #else
  533. STARPU_ASSERT(0);
  534. #endif
  535. }
  536. }
  537. #endif /* STARPU_USE_CUDA */
  538. #ifdef STARPU_USE_OPENCL
  539. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  540. void *dst_interface, unsigned dst_node,
  541. void *_event)
  542. {
  543. int err, ret;
  544. size_t size;
  545. struct starpu_multiformat_interface *src_multiformat;
  546. struct starpu_multiformat_interface *dst_multiformat;
  547. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  548. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  549. STARPU_ASSERT(src_multiformat != NULL);
  550. STARPU_ASSERT(dst_multiformat != NULL);
  551. STARPU_ASSERT(src_multiformat->ops != NULL);
  552. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  553. err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->cpu_ptr,
  554. src_node,
  555. (cl_mem) dst_multiformat->cpu_ptr,
  556. dst_node,
  557. size,
  558. dst_multiformat->offset,
  559. (cl_event *) _event,
  560. &ret);
  561. if (STARPU_UNLIKELY(err))
  562. STARPU_OPENCL_REPORT_ERROR(err);
  563. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  564. return ret;
  565. }
  566. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  567. void *dst_interface, unsigned dst_node,
  568. void *_event)
  569. {
  570. int err, ret;
  571. size_t size;
  572. struct starpu_multiformat_interface *src_multiformat;
  573. struct starpu_multiformat_interface *dst_multiformat;
  574. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  575. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  576. STARPU_ASSERT(src_multiformat != NULL);
  577. STARPU_ASSERT(dst_multiformat != NULL);
  578. STARPU_ASSERT(src_multiformat->ops != NULL);
  579. STARPU_ASSERT(dst_multiformat->ops != NULL);
  580. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  581. if (dst_multiformat->opencl_ptr == NULL) {
  582. /* XXX : it is weird that we might have to allocate memory here... */
  583. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  584. }
  585. err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->opencl_ptr,
  586. src_node,
  587. dst_multiformat->opencl_ptr,
  588. dst_node,
  589. size,
  590. src_multiformat->offset,
  591. (cl_event *)_event,
  592. &ret);
  593. if (STARPU_UNLIKELY(err))
  594. STARPU_OPENCL_REPORT_ERROR(err);
  595. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  596. return ret;
  597. }
  598. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  599. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  600. {
  601. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  602. }
  603. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  604. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  605. {
  606. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  607. }
  608. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  609. void *dst_interface, unsigned dst_node)
  610. {
  611. /* TODO */
  612. }
  613. #endif