multiformat_interface.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <datawizard/coherency.h>
  19. #include <datawizard/copy_driver.h>
  20. #include <datawizard/filters.h>
  21. #include <common/hash.h>
  22. #include <starpu_cuda.h>
  23. #include <starpu_opencl.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  26. #ifdef STARPU_USE_CUDA
  27. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  30. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  31. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  32. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. #endif
  34. #ifdef STARPU_USE_OPENCL
  35. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  36. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  37. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  39. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  40. #endif
  41. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s = {
  42. .ram_to_ram = copy_ram_to_ram,
  43. .ram_to_spu = NULL,
  44. #ifdef STARPU_USE_CUDA
  45. .ram_to_cuda = copy_ram_to_cuda,
  46. .cuda_to_ram = copy_cuda_to_ram,
  47. .ram_to_cuda_async = copy_ram_to_cuda_async,
  48. .cuda_to_ram_async = copy_cuda_to_ram_async,
  49. .cuda_to_cuda = copy_cuda_to_cuda,
  50. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  51. #endif
  52. #ifdef STARPU_USE_OPENCL
  53. .ram_to_opencl = copy_ram_to_opencl,
  54. .opencl_to_ram = copy_opencl_to_ram,
  55. .opencl_to_opencl = copy_opencl_to_opencl,
  56. .ram_to_opencl_async = copy_ram_to_opencl_async,
  57. .opencl_to_ram_async = copy_opencl_to_ram_async,
  58. #endif
  59. .cuda_to_spu = NULL,
  60. .spu_to_ram = NULL,
  61. .spu_to_cuda = NULL,
  62. .spu_to_spu = NULL
  63. };
  64. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  65. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  66. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  67. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  68. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  69. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  70. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  71. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  72. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  73. #ifdef STARPU_USE_GORDON
  74. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  75. #endif
  76. static struct starpu_data_interface_ops interface_multiformat_ops = {
  77. .register_data_handle = register_multiformat_handle,
  78. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  79. .handle_to_pointer = multiformat_handle_to_pointer,
  80. .free_data_on_node = free_multiformat_buffer_on_node,
  81. .copy_methods = &multiformat_copy_data_methods_s,
  82. .get_size = multiformat_interface_get_size,
  83. .footprint = footprint_multiformat_interface_crc32,
  84. .compare = multiformat_compare,
  85. #ifdef STARPU_USE_GORDON
  86. .convert_to_gordon = NULL,
  87. #endif
  88. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  89. .interface_size = sizeof(struct starpu_multiformat_interface),
  90. .display = display_multiformat_interface
  91. };
  92. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  93. {
  94. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  95. struct starpu_multiformat_interface *multiformat_interface =
  96. starpu_data_get_interface_on_node(handle, node);
  97. switch(_starpu_get_node_kind(node)) {
  98. case STARPU_CPU_RAM:
  99. return multiformat_interface->cpu_ptr;
  100. #ifdef STARPU_USE_CUDA
  101. case STARPU_CUDA_RAM:
  102. return multiformat_interface->cuda_ptr;
  103. #endif
  104. #ifdef STARPU_USE_OPENCL
  105. case STARPU_OPENCL_RAM:
  106. return multiformat_interface->opencl_ptr;
  107. #endif
  108. default:
  109. STARPU_ASSERT(0);
  110. }
  111. }
  112. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  113. {
  114. struct starpu_multiformat_interface *multiformat_interface;
  115. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  116. unsigned node;
  117. for (node = 0; node < STARPU_MAXNODES; node++) {
  118. struct starpu_multiformat_interface *local_interface =
  119. starpu_data_get_interface_on_node(handle, node);
  120. if (node == home_node) {
  121. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  122. #ifdef STARPU_USE_CUDA
  123. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  124. #endif
  125. #ifdef STARPU_USE_OPENCL
  126. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  127. #endif
  128. local_interface->dev_handle = multiformat_interface->dev_handle;
  129. local_interface->offset = multiformat_interface->offset;
  130. }
  131. else {
  132. local_interface->cpu_ptr = NULL;
  133. #ifdef STARPU_USE_CUDA
  134. local_interface->cuda_ptr = NULL;
  135. #endif
  136. #ifdef STARPU_USE_OPENCL
  137. local_interface->opencl_ptr = NULL;
  138. #endif
  139. local_interface->dev_handle = 0;
  140. local_interface->offset = 0;
  141. }
  142. local_interface->nx = multiformat_interface->nx;
  143. local_interface->ops = multiformat_interface->ops;
  144. local_interface->conversion_time = 0;
  145. }
  146. }
  147. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  148. uint32_t home_node,
  149. void *ptr,
  150. uint32_t nobjects,
  151. struct starpu_multiformat_data_interface_ops *format_ops)
  152. {
  153. struct starpu_multiformat_interface multiformat = {
  154. .cpu_ptr = ptr,
  155. #ifdef STARPU_USE_CUDA
  156. .cuda_ptr = NULL,
  157. #endif
  158. #ifdef STARPu_USE_OPENCL
  159. .opencl_ptr = NULL,
  160. #endif
  161. .nx = nobjects,
  162. .dev_handle = (uintptr_t) ptr,
  163. .offset = 0,
  164. .ops = format_ops
  165. };
  166. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  167. }
  168. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  169. {
  170. return _starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  171. }
  172. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  173. {
  174. struct starpu_multiformat_interface *multiformat_a = data_interface_a;
  175. struct starpu_multiformat_interface *multiformat_b = data_interface_b;
  176. return ((multiformat_a->nx == multiformat_b->nx)
  177. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  178. #ifdef STARPU_USE_CUDA
  179. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  180. #endif
  181. #if STARPU_USE_OPENCL
  182. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  183. #endif
  184. );
  185. }
  186. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  187. {
  188. /* TODO */
  189. struct starpu_multiformat_interface *multiformat_interface;
  190. multiformat_interface = (struct starpu_multiformat_interface *)
  191. starpu_data_get_interface_on_node(handle, 0);
  192. fprintf(f, "%u\t", multiformat_interface->nx);
  193. }
  194. /* XXX : returns CPU size */
  195. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  196. {
  197. size_t size;
  198. struct starpu_multiformat_interface *multiformat_interface;
  199. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  200. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  201. return size;
  202. }
  203. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  204. {
  205. struct starpu_multiformat_interface *multiformat_interface;
  206. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  207. return multiformat_interface->nx;
  208. }
  209. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  210. {
  211. struct starpu_multiformat_interface *multiformat_interface;
  212. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  213. enum _starpu_node_kind kind = _starpu_get_node_kind(node);
  214. switch(kind) {
  215. case STARPU_CPU_RAM:
  216. free(multiformat_interface->cpu_ptr);
  217. multiformat_interface->cpu_ptr = NULL;
  218. break;
  219. #ifdef STARPU_USE_CUDA
  220. case STARPU_CUDA_RAM:
  221. cudaFree(multiformat_interface->cuda_ptr);
  222. multiformat_interface->cuda_ptr = NULL;
  223. break;
  224. #endif
  225. #ifdef STARPU_USE_OPENCL
  226. case STARPU_OPENCL_RAM:
  227. /* TODO */
  228. break;
  229. #endif
  230. default:
  231. STARPU_ABORT();
  232. }
  233. }
  234. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  235. {
  236. struct starpu_multiformat_interface *multiformat_interface;
  237. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  238. unsigned fail = 0;
  239. uintptr_t addr = 0;
  240. ssize_t allocated_memory;
  241. enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
  242. switch(kind) {
  243. case STARPU_CPU_RAM:
  244. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  245. addr = (uintptr_t)malloc(allocated_memory);
  246. if (!addr) {
  247. fail = 1;
  248. }
  249. else {
  250. multiformat_interface->cpu_ptr = (void *) addr;
  251. multiformat_interface->dev_handle = addr;
  252. }
  253. break;
  254. #ifdef STARPU_USE_CUDA
  255. case STARPU_CUDA_RAM:
  256. {
  257. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  258. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  259. if (STARPU_UNLIKELY(status)) {
  260. STARPU_CUDA_REPORT_ERROR(status);
  261. }
  262. else {
  263. multiformat_interface->cuda_ptr = (void *)addr;
  264. multiformat_interface->dev_handle = addr;
  265. }
  266. break;
  267. }
  268. #endif
  269. #ifdef STARPU_USE_OPENCL
  270. case STARPU_OPENCL_RAM:
  271. {
  272. int ret;
  273. void *ptr;
  274. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  275. ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  276. addr = (uintptr_t)ptr;
  277. if (ret) {
  278. fail = 1;
  279. }
  280. else {
  281. multiformat_interface->opencl_ptr = (void *)addr;
  282. multiformat_interface->dev_handle = addr;
  283. }
  284. break;
  285. }
  286. break;
  287. #endif
  288. default:
  289. STARPU_ASSERT(0);
  290. }
  291. if (fail)
  292. return -ENOMEM;
  293. multiformat_interface->offset = 0;
  294. return allocated_memory;
  295. }
  296. /*
  297. * Copy methods
  298. */
  299. static int copy_ram_to_ram(void *src_interface, unsigned src_node,
  300. void *dst_interface, unsigned dst_node)
  301. {
  302. struct starpu_multiformat_interface *src_multiformat;
  303. struct starpu_multiformat_interface *dst_multiformat;
  304. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  305. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  306. STARPU_ASSERT(src_multiformat != NULL);
  307. STARPU_ASSERT(dst_multiformat != NULL);
  308. STARPU_ASSERT(dst_multiformat->ops != NULL);
  309. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  310. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  311. return 0;
  312. }
  313. #ifdef STARPU_USE_CUDA
  314. static int copy_cuda_common(void *src_interface, unsigned src_node,
  315. void *dst_interface, unsigned dst_node,
  316. enum cudaMemcpyKind kind)
  317. {
  318. struct starpu_multiformat_interface *src_multiformat;
  319. struct starpu_multiformat_interface *dst_multiformat;
  320. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  321. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  322. size_t size;
  323. cudaError_t status;
  324. switch (kind) {
  325. case cudaMemcpyHostToDevice:
  326. {
  327. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  328. if (src_multiformat->cuda_ptr == NULL) {
  329. src_multiformat->cuda_ptr = malloc(size);
  330. if (src_multiformat->cuda_ptr == NULL)
  331. return -ENOMEM;
  332. }
  333. /* Converting data , from host to host */
  334. double tmp = starpu_timing_now();
  335. void *buffers[1];
  336. buffers[0] = src_interface;
  337. struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
  338. cl->cpu_func(buffers, NULL);
  339. dst_multiformat->conversion_time = starpu_timing_now() - tmp;
  340. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  341. if (STARPU_UNLIKELY(status)) {
  342. STARPU_CUDA_REPORT_ERROR(status);
  343. }
  344. break;
  345. }
  346. case cudaMemcpyDeviceToHost:
  347. {
  348. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  349. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  350. if (STARPU_UNLIKELY(status))
  351. STARPU_CUDA_REPORT_ERROR(status);
  352. void *buffers[1];
  353. struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
  354. buffers[0] = dst_interface;
  355. cl->cpu_func(buffers, NULL);
  356. break;
  357. }
  358. case cudaMemcpyDeviceToDevice:
  359. {
  360. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  361. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  362. if (STARPU_UNLIKELY(status))
  363. STARPU_CUDA_REPORT_ERROR(status);
  364. break;
  365. }
  366. default:
  367. STARPU_ASSERT(0);
  368. }
  369. return 0;
  370. }
  371. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  372. {
  373. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  374. }
  375. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  376. {
  377. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  378. }
  379. static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
  380. {
  381. struct starpu_multiformat_interface *src_multiformat;
  382. struct starpu_multiformat_interface *dst_multiformat;
  383. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  384. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  385. size_t size;
  386. cudaError_t status;
  387. switch (kind) {
  388. case cudaMemcpyHostToDevice:
  389. {
  390. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  391. if (src_multiformat->cuda_ptr == NULL) {
  392. src_multiformat->cuda_ptr = malloc(size);
  393. if (src_multiformat->cuda_ptr == NULL)
  394. return -ENOMEM;
  395. }
  396. /* Converting data , from host to host */
  397. double tmp = starpu_timing_now();
  398. void *buffers[1]; // XXX
  399. buffers[0] = src_interface;
  400. struct starpu_codelet *cl = src_multiformat->ops->cpu_to_cuda_cl;
  401. cl->cpu_func(buffers, NULL);
  402. dst_multiformat->conversion_time = starpu_timing_now() - tmp;
  403. /* Actual copy from host to device */
  404. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  405. if (STARPU_UNLIKELY(status)) {
  406. STARPU_CUDA_REPORT_ERROR(status);
  407. }
  408. break;
  409. }
  410. case cudaMemcpyDeviceToHost:
  411. {
  412. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  413. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  414. if (STARPU_UNLIKELY(status))
  415. STARPU_CUDA_REPORT_ERROR(status);
  416. /* Converting data */
  417. void *buffers[1];
  418. struct starpu_codelet *cl = src_multiformat->ops->cuda_to_cpu_cl;
  419. buffers[0] = dst_interface;
  420. cl->cpu_func(buffers, NULL);
  421. break;
  422. }
  423. case cudaMemcpyDeviceToDevice:
  424. {
  425. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  426. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  427. if (STARPU_UNLIKELY(status))
  428. STARPU_CUDA_REPORT_ERROR(status);
  429. break;
  430. }
  431. default:
  432. STARPU_ASSERT(0);
  433. }
  434. return 0;
  435. }
  436. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  437. {
  438. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  439. }
  440. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  441. {
  442. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  443. }
  444. #ifdef HAVE_CUDA_MEMCPY_PEER
  445. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  446. void *dst_interface, unsigned dst_node,
  447. cudaStream_t stream)
  448. {
  449. struct starpu_multiformat_interface *src_multiformat;
  450. struct starpu_multiformat_interface *dst_multiformat;
  451. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  452. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  453. STARPU_ASSERT(src_multiformat != NULL);
  454. STARPU_ASSERT(dst_multiformat != NULL);
  455. STARPU_ASSERT(src_multiformat->ops != NULL);
  456. cudaError_t status;
  457. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  458. int src_dev = _starpu_memory_node_to_devid(src_node);
  459. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  460. if (stream)
  461. {
  462. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  463. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  464. src_multiformat->cuda_ptr, src_dev,
  465. size, stream);
  466. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  467. /* All good ! Still, returning -EAGAIN, because we will need to
  468. check the transfert completion later */
  469. if (status == cudaSuccess)
  470. return -EAGAIN;
  471. }
  472. /* Either a synchronous transfert was requested, or the asynchronous one
  473. failed. */
  474. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  475. src_multiformat->cuda_ptr, src_dev,
  476. size);
  477. if (STARPU_UNLIKELY(status != cudaSuccess))
  478. STARPU_CUDA_REPORT_ERROR(status);
  479. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  480. return 0;
  481. }
  482. #endif
  483. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  484. {
  485. if (src_node == dst_node)
  486. {
  487. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  488. }
  489. else
  490. {
  491. #ifdef HAVE_CUDA_MEMCPY_PEER
  492. return copy_cuda_peer_common(src_interface, src_node,
  493. dst_interface, dst_node,
  494. NULL);
  495. #else
  496. STARPU_ASSERT(0);
  497. #endif
  498. }
  499. }
  500. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  501. void *dst_interface, unsigned dst_node,
  502. cudaStream_t stream)
  503. {
  504. if (src_node == dst_node)
  505. {
  506. return copy_cuda_common_async(src_interface, src_node,
  507. dst_interface, dst_node,
  508. stream, cudaMemcpyDeviceToDevice);
  509. }
  510. else
  511. {
  512. #ifdef HAVE_CUDA_MEMCPY_PEER
  513. return copy_cuda_peer_common(src_interface, src_node,
  514. dst_interface, dst_node,
  515. stream);
  516. #else
  517. STARPU_ASSERT(0);
  518. #endif
  519. }
  520. }
  521. #endif /* STARPU_USE_CUDA */
  522. #ifdef STARPU_USE_OPENCL
  523. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  524. void *dst_interface, unsigned dst_node,
  525. void *_event)
  526. {
  527. int err, ret;
  528. size_t size;
  529. struct starpu_multiformat_interface *src_multiformat;
  530. struct starpu_multiformat_interface *dst_multiformat;
  531. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  532. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  533. STARPU_ASSERT(src_multiformat != NULL);
  534. STARPU_ASSERT(dst_multiformat != NULL);
  535. STARPU_ASSERT(src_multiformat->ops != NULL);
  536. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  537. if (src_multiformat->opencl_ptr == NULL) {
  538. src_multiformat->opencl_ptr = malloc(src_multiformat->nx * src_multiformat->ops->opencl_elemsize);
  539. if (src_multiformat->opencl_ptr == NULL) {
  540. return -ENOMEM;
  541. }
  542. double tmp = starpu_timing_now();
  543. void *buffers[1];
  544. struct starpu_codelet *cl = src_multiformat->ops->cpu_to_opencl_cl;
  545. buffers[0] = src_interface;
  546. cl->cpu_func(buffers, NULL);
  547. dst_multiformat->conversion_time = starpu_timing_now() - tmp;
  548. if (src_multiformat->opencl_ptr == NULL)
  549. return -ENOMEM; // XXX
  550. }
  551. err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->opencl_ptr,
  552. src_node,
  553. (cl_mem) dst_multiformat->dev_handle,
  554. dst_node,
  555. size,
  556. dst_multiformat->offset,
  557. (cl_event *) _event,
  558. &ret);
  559. if (STARPU_UNLIKELY(err))
  560. STARPU_OPENCL_REPORT_ERROR(err);
  561. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  562. return ret;
  563. }
  564. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  565. void *dst_interface, unsigned dst_node,
  566. void *_event)
  567. {
  568. int err, ret;
  569. size_t size;
  570. struct starpu_multiformat_interface *src_multiformat;
  571. struct starpu_multiformat_interface *dst_multiformat;
  572. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  573. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  574. STARPU_ASSERT(src_multiformat != NULL);
  575. STARPU_ASSERT(dst_multiformat != NULL);
  576. STARPU_ASSERT(src_multiformat->ops != NULL);
  577. STARPU_ASSERT(dst_multiformat->ops != NULL);
  578. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize,
  579. err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->dev_handle,
  580. src_node,
  581. dst_multiformat->opencl_ptr,
  582. dst_node,
  583. size,
  584. src_multiformat->offset,
  585. (cl_event *)_event,
  586. &ret);
  587. if (STARPU_UNLIKELY(err))
  588. STARPU_OPENCL_REPORT_ERROR(err);
  589. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  590. /* XXX So much for asynchronicity */
  591. clWaitForEvents(1, _event);
  592. void *buffers[1];
  593. struct starpu_codelet *cl = src_multiformat->ops->opencl_to_cpu_cl;
  594. buffers[0] = dst_interface;
  595. cl->cpu_func(buffers, NULL);
  596. return ret;
  597. }
  598. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  599. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  600. {
  601. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  602. }
  603. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  604. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  605. {
  606. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  607. }
  608. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  609. void *dst_interface, unsigned dst_node)
  610. {
  611. /* TODO */
  612. }
  613. #endif