multiformat_interface.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011 Institut National de Recherche en Informatique et Automatique
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <datawizard/coherency.h>
  19. #include <datawizard/copy_driver.h>
  20. #include <datawizard/filters.h>
  21. #include <common/hash.h>
  22. #include <starpu_cuda.h>
  23. #include <starpu_opencl.h>
  24. #include <drivers/opencl/driver_opencl.h>
  25. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  26. #ifdef STARPU_USE_CUDA
  27. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  30. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  31. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  32. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. #endif
  34. #ifdef STARPU_USE_OPENCL
  35. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  36. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  37. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  38. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  39. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, void *_event);
  40. #endif
  41. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  42. {
  43. .ram_to_ram = copy_ram_to_ram,
  44. .ram_to_spu = NULL,
  45. #ifdef STARPU_USE_CUDA
  46. .ram_to_cuda = copy_ram_to_cuda,
  47. .cuda_to_ram = copy_cuda_to_ram,
  48. .ram_to_cuda_async = copy_ram_to_cuda_async,
  49. .cuda_to_ram_async = copy_cuda_to_ram_async,
  50. .cuda_to_cuda = copy_cuda_to_cuda,
  51. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  52. #endif
  53. #ifdef STARPU_USE_OPENCL
  54. .ram_to_opencl = copy_ram_to_opencl,
  55. .opencl_to_ram = copy_opencl_to_ram,
  56. .opencl_to_opencl = copy_opencl_to_opencl,
  57. .ram_to_opencl_async = copy_ram_to_opencl_async,
  58. .opencl_to_ram_async = copy_opencl_to_ram_async,
  59. #endif
  60. .cuda_to_spu = NULL,
  61. .spu_to_ram = NULL,
  62. .spu_to_cuda = NULL,
  63. .spu_to_spu = NULL
  64. };
  65. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface);
  66. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node);
  67. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, uint32_t node);
  68. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node);
  69. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  70. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  71. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  72. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  73. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  74. #ifdef STARPU_USE_GORDON
  75. static int convert_multiformat_to_gordon(void *data_interface, uint64_t *ptr, gordon_strideSize_t *ss);
  76. #endif
  77. static struct starpu_data_interface_ops interface_multiformat_ops =
  78. {
  79. .register_data_handle = register_multiformat_handle,
  80. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  81. .handle_to_pointer = multiformat_handle_to_pointer,
  82. .free_data_on_node = free_multiformat_buffer_on_node,
  83. .copy_methods = &multiformat_copy_data_methods_s,
  84. .get_size = multiformat_interface_get_size,
  85. .footprint = footprint_multiformat_interface_crc32,
  86. .compare = multiformat_compare,
  87. #ifdef STARPU_USE_GORDON
  88. .convert_to_gordon = NULL,
  89. #endif
  90. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  91. .interface_size = sizeof(struct starpu_multiformat_interface),
  92. .display = display_multiformat_interface
  93. };
  94. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, uint32_t node)
  95. {
  96. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  97. struct starpu_multiformat_interface *multiformat_interface =
  98. starpu_data_get_interface_on_node(handle, node);
  99. switch(_starpu_get_node_kind(node))
  100. {
  101. case STARPU_CPU_RAM:
  102. return multiformat_interface->cpu_ptr;
  103. #ifdef STARPU_USE_CUDA
  104. case STARPU_CUDA_RAM:
  105. return multiformat_interface->cuda_ptr;
  106. #endif
  107. #ifdef STARPU_USE_OPENCL
  108. case STARPU_OPENCL_RAM:
  109. return multiformat_interface->opencl_ptr;
  110. #endif
  111. default:
  112. STARPU_ASSERT(0);
  113. }
  114. }
  115. static void register_multiformat_handle(starpu_data_handle_t handle, uint32_t home_node, void *data_interface)
  116. {
  117. struct starpu_multiformat_interface *multiformat_interface;
  118. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  119. unsigned node;
  120. for (node = 0; node < STARPU_MAXNODES; node++)
  121. {
  122. struct starpu_multiformat_interface *local_interface =
  123. starpu_data_get_interface_on_node(handle, node);
  124. if (node == home_node)
  125. {
  126. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  127. #ifdef STARPU_USE_CUDA
  128. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  129. #endif
  130. #ifdef STARPU_USE_OPENCL
  131. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  132. #endif
  133. local_interface->dev_handle = multiformat_interface->dev_handle;
  134. local_interface->offset = multiformat_interface->offset;
  135. }
  136. else
  137. {
  138. local_interface->cpu_ptr = NULL;
  139. #ifdef STARPU_USE_CUDA
  140. local_interface->cuda_ptr = NULL;
  141. #endif
  142. #ifdef STARPU_USE_OPENCL
  143. local_interface->opencl_ptr = NULL;
  144. #endif
  145. local_interface->dev_handle = 0;
  146. local_interface->offset = 0;
  147. }
  148. local_interface->nx = multiformat_interface->nx;
  149. local_interface->ops = multiformat_interface->ops;
  150. }
  151. }
  152. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  153. uint32_t home_node,
  154. void *ptr,
  155. uint32_t nobjects,
  156. struct starpu_multiformat_data_interface_ops *format_ops)
  157. {
  158. struct starpu_multiformat_interface multiformat =
  159. {
  160. .cpu_ptr = ptr,
  161. #ifdef STARPU_USE_CUDA
  162. .cuda_ptr = NULL,
  163. #endif
  164. #ifdef STARPu_USE_OPENCL
  165. .opencl_ptr = NULL,
  166. #endif
  167. .nx = nobjects,
  168. .dev_handle = (uintptr_t) ptr,
  169. .offset = 0,
  170. .ops = format_ops
  171. };
  172. starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
  173. }
  174. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  175. {
  176. return _starpu_crc32_be(starpu_multiformat_get_nx(handle), 0);
  177. }
  178. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  179. {
  180. struct starpu_multiformat_interface *multiformat_a = data_interface_a;
  181. struct starpu_multiformat_interface *multiformat_b = data_interface_b;
  182. return ((multiformat_a->nx == multiformat_b->nx)
  183. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  184. #ifdef STARPU_USE_CUDA
  185. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  186. #endif
  187. #if STARPU_USE_OPENCL
  188. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  189. #endif
  190. );
  191. }
  192. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  193. {
  194. /* TODO */
  195. struct starpu_multiformat_interface *multiformat_interface;
  196. multiformat_interface = (struct starpu_multiformat_interface *)
  197. starpu_data_get_interface_on_node(handle, 0);
  198. fprintf(f, "%u\t", multiformat_interface->nx);
  199. }
  200. /* XXX : returns CPU size */
  201. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  202. {
  203. size_t size;
  204. struct starpu_multiformat_interface *multiformat_interface;
  205. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  206. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  207. return size;
  208. }
  209. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  210. {
  211. struct starpu_multiformat_interface *multiformat_interface;
  212. multiformat_interface = starpu_data_get_interface_on_node(handle, 0);
  213. return multiformat_interface->nx;
  214. }
  215. static void free_multiformat_buffer_on_node(void *data_interface, uint32_t node)
  216. {
  217. struct starpu_multiformat_interface *multiformat_interface;
  218. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  219. enum _starpu_node_kind kind = _starpu_get_node_kind(node);
  220. switch(kind)
  221. {
  222. case STARPU_CPU_RAM:
  223. free(multiformat_interface->cpu_ptr);
  224. multiformat_interface->cpu_ptr = NULL;
  225. break;
  226. #ifdef STARPU_USE_CUDA
  227. case STARPU_CUDA_RAM:
  228. if (multiformat_interface->cpu_ptr)
  229. {
  230. cudaFree(multiformat_interface->cpu_ptr);
  231. multiformat_interface->cpu_ptr = NULL;
  232. }
  233. if (multiformat_interface->cuda_ptr)
  234. {
  235. cudaFree(multiformat_interface->cuda_ptr);
  236. multiformat_interface->cuda_ptr = NULL;
  237. }
  238. break;
  239. #endif
  240. #ifdef STARPU_USE_OPENCL
  241. case STARPU_OPENCL_RAM:
  242. /* TODO */
  243. break;
  244. #endif
  245. default:
  246. STARPU_ABORT();
  247. }
  248. }
  249. static ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, uint32_t dst_node)
  250. {
  251. struct starpu_multiformat_interface *multiformat_interface;
  252. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  253. unsigned fail = 0;
  254. uintptr_t addr = 0;
  255. ssize_t allocated_memory;
  256. enum _starpu_node_kind kind = _starpu_get_node_kind(dst_node);
  257. switch(kind)
  258. {
  259. case STARPU_CPU_RAM:
  260. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  261. addr = (uintptr_t)malloc(allocated_memory);
  262. if (!addr)
  263. {
  264. fail = 1;
  265. }
  266. else
  267. {
  268. multiformat_interface->cpu_ptr = (void *) addr;
  269. multiformat_interface->dev_handle = addr;
  270. }
  271. #ifdef STARPU_USE_CUDA
  272. multiformat_interface->cuda_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  273. STARPU_ASSERT(multiformat_interface->cuda_ptr != NULL);
  274. #endif
  275. #ifdef STARPU_USE_OPENCL
  276. multiformat_interface->opencl_ptr = malloc(multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  277. STARPU_ASSERT(multiformat_interface->opencl_ptr != NULL);
  278. #endif
  279. break;
  280. #ifdef STARPU_USE_CUDA
  281. case STARPU_CUDA_RAM:
  282. {
  283. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  284. cudaError_t status = cudaMalloc((void **)&addr, allocated_memory);
  285. if (STARPU_UNLIKELY(status))
  286. {
  287. STARPU_CUDA_REPORT_ERROR(status);
  288. }
  289. else
  290. {
  291. multiformat_interface->cuda_ptr = (void *)addr;
  292. multiformat_interface->dev_handle = addr;
  293. }
  294. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  295. status = cudaMalloc((void **)&multiformat_interface->cpu_ptr, allocated_memory);
  296. if (STARPU_UNLIKELY(status != cudaSuccess))
  297. STARPU_CUDA_REPORT_ERROR(status);
  298. break;
  299. }
  300. #endif
  301. #ifdef STARPU_USE_OPENCL
  302. case STARPU_OPENCL_RAM:
  303. {
  304. int ret;
  305. void *ptr;
  306. allocated_memory = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  307. ret = _starpu_opencl_allocate_memory(&ptr, allocated_memory, CL_MEM_READ_WRITE);
  308. addr = (uintptr_t)ptr;
  309. if (ret)
  310. {
  311. fail = 1;
  312. }
  313. else
  314. {
  315. multiformat_interface->opencl_ptr = (void *)addr;
  316. multiformat_interface->dev_handle = addr;
  317. }
  318. _starpu_opencl_allocate_memory(&multiformat_interface->cpu_ptr,
  319. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize,
  320. CL_MEM_READ_WRITE);
  321. break;
  322. }
  323. #endif
  324. default:
  325. STARPU_ASSERT(0);
  326. }
  327. if (fail)
  328. return -ENOMEM;
  329. multiformat_interface->offset = 0;
  330. return allocated_memory;
  331. }
  332. /*
  333. * Copy methods
  334. */
  335. static int copy_ram_to_ram(void *src_interface, unsigned src_node,
  336. void *dst_interface, unsigned dst_node)
  337. {
  338. struct starpu_multiformat_interface *src_multiformat;
  339. struct starpu_multiformat_interface *dst_multiformat;
  340. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  341. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  342. STARPU_ASSERT(src_multiformat != NULL);
  343. STARPU_ASSERT(dst_multiformat != NULL);
  344. STARPU_ASSERT(dst_multiformat->ops != NULL);
  345. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  346. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  347. return 0;
  348. }
  349. #ifdef STARPU_USE_CUDA
  350. static int copy_cuda_common(void *src_interface, unsigned src_node,
  351. void *dst_interface, unsigned dst_node,
  352. enum cudaMemcpyKind kind)
  353. {
  354. struct starpu_multiformat_interface *src_multiformat;
  355. struct starpu_multiformat_interface *dst_multiformat;
  356. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  357. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  358. size_t size;
  359. cudaError_t status;
  360. switch (kind)
  361. {
  362. case cudaMemcpyHostToDevice:
  363. {
  364. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  365. if (src_multiformat->cuda_ptr == NULL)
  366. {
  367. src_multiformat->cuda_ptr = malloc(size);
  368. if (src_multiformat->cuda_ptr == NULL)
  369. return -ENOMEM;
  370. }
  371. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  372. if (STARPU_UNLIKELY(status))
  373. {
  374. STARPU_CUDA_REPORT_ERROR(status);
  375. }
  376. break;
  377. }
  378. case cudaMemcpyDeviceToHost:
  379. {
  380. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  381. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  382. if (STARPU_UNLIKELY(status))
  383. STARPU_CUDA_REPORT_ERROR(status);
  384. break;
  385. }
  386. case cudaMemcpyDeviceToDevice:
  387. {
  388. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  389. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  390. if (STARPU_UNLIKELY(status))
  391. STARPU_CUDA_REPORT_ERROR(status);
  392. break;
  393. }
  394. default:
  395. STARPU_ASSERT(0);
  396. }
  397. return 0;
  398. }
  399. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  400. {
  401. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  402. }
  403. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  404. {
  405. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  406. }
  407. static int copy_cuda_common_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream, enum cudaMemcpyKind kind)
  408. {
  409. struct starpu_multiformat_interface *src_multiformat;
  410. struct starpu_multiformat_interface *dst_multiformat;
  411. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  412. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  413. size_t size;
  414. cudaError_t status;
  415. switch (kind)
  416. {
  417. case cudaMemcpyHostToDevice:
  418. {
  419. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  420. if (src_multiformat->cuda_ptr == NULL)
  421. {
  422. src_multiformat->cuda_ptr = malloc(size);
  423. if (src_multiformat->cuda_ptr == NULL)
  424. return -ENOMEM;
  425. }
  426. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  427. if (STARPU_UNLIKELY(status))
  428. {
  429. STARPU_CUDA_REPORT_ERROR(status);
  430. }
  431. break;
  432. }
  433. case cudaMemcpyDeviceToHost:
  434. {
  435. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  436. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  437. if (STARPU_UNLIKELY(status))
  438. STARPU_CUDA_REPORT_ERROR(status);
  439. break;
  440. }
  441. case cudaMemcpyDeviceToDevice:
  442. {
  443. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  444. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  445. if (STARPU_UNLIKELY(status))
  446. STARPU_CUDA_REPORT_ERROR(status);
  447. break;
  448. }
  449. default:
  450. STARPU_ASSERT(0);
  451. }
  452. return 0;
  453. }
  454. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  455. {
  456. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  457. }
  458. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  459. {
  460. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  461. }
  462. #ifdef HAVE_CUDA_MEMCPY_PEER
  463. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  464. void *dst_interface, unsigned dst_node,
  465. cudaStream_t stream)
  466. {
  467. struct starpu_multiformat_interface *src_multiformat;
  468. struct starpu_multiformat_interface *dst_multiformat;
  469. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  470. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  471. STARPU_ASSERT(src_multiformat != NULL);
  472. STARPU_ASSERT(dst_multiformat != NULL);
  473. STARPU_ASSERT(src_multiformat->ops != NULL);
  474. cudaError_t status;
  475. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  476. int src_dev = _starpu_memory_node_to_devid(src_node);
  477. int dst_dev = _starpu_memory_node_to_devid(dst_node);
  478. if (stream)
  479. {
  480. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  481. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  482. src_multiformat->cuda_ptr, src_dev,
  483. size, stream);
  484. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  485. /* All good ! Still, returning -EAGAIN, because we will need to
  486. check the transfert completion later */
  487. if (status == cudaSuccess)
  488. return -EAGAIN;
  489. }
  490. /* Either a synchronous transfert was requested, or the asynchronous one
  491. failed. */
  492. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  493. src_multiformat->cuda_ptr, src_dev,
  494. size);
  495. if (STARPU_UNLIKELY(status != cudaSuccess))
  496. STARPU_CUDA_REPORT_ERROR(status);
  497. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  498. return 0;
  499. }
  500. #endif
  501. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  502. {
  503. if (src_node == dst_node)
  504. {
  505. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  506. }
  507. else
  508. {
  509. #ifdef HAVE_CUDA_MEMCPY_PEER
  510. return copy_cuda_peer_common(src_interface, src_node,
  511. dst_interface, dst_node,
  512. NULL);
  513. #else
  514. STARPU_ASSERT(0);
  515. #endif
  516. }
  517. }
  518. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  519. void *dst_interface, unsigned dst_node,
  520. cudaStream_t stream)
  521. {
  522. if (src_node == dst_node)
  523. {
  524. return copy_cuda_common_async(src_interface, src_node,
  525. dst_interface, dst_node,
  526. stream, cudaMemcpyDeviceToDevice);
  527. }
  528. else
  529. {
  530. #ifdef HAVE_CUDA_MEMCPY_PEER
  531. return copy_cuda_peer_common(src_interface, src_node,
  532. dst_interface, dst_node,
  533. stream);
  534. #else
  535. STARPU_ASSERT(0);
  536. #endif
  537. }
  538. }
  539. #endif /* STARPU_USE_CUDA */
  540. #ifdef STARPU_USE_OPENCL
  541. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  542. void *dst_interface, unsigned dst_node,
  543. void *_event)
  544. {
  545. int err, ret;
  546. size_t size;
  547. struct starpu_multiformat_interface *src_multiformat;
  548. struct starpu_multiformat_interface *dst_multiformat;
  549. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  550. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  551. STARPU_ASSERT(src_multiformat != NULL);
  552. STARPU_ASSERT(dst_multiformat != NULL);
  553. STARPU_ASSERT(src_multiformat->ops != NULL);
  554. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  555. err = _starpu_opencl_copy_ram_to_opencl_async_sync(src_multiformat->cpu_ptr,
  556. src_node,
  557. (cl_mem) dst_multiformat->cpu_ptr,
  558. dst_node,
  559. size,
  560. dst_multiformat->offset,
  561. (cl_event *) _event,
  562. &ret);
  563. if (STARPU_UNLIKELY(err))
  564. STARPU_OPENCL_REPORT_ERROR(err);
  565. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  566. return ret;
  567. }
  568. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  569. void *dst_interface, unsigned dst_node,
  570. void *_event)
  571. {
  572. int err, ret;
  573. size_t size;
  574. struct starpu_multiformat_interface *src_multiformat;
  575. struct starpu_multiformat_interface *dst_multiformat;
  576. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  577. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  578. STARPU_ASSERT(src_multiformat != NULL);
  579. STARPU_ASSERT(dst_multiformat != NULL);
  580. STARPU_ASSERT(src_multiformat->ops != NULL);
  581. STARPU_ASSERT(dst_multiformat->ops != NULL);
  582. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  583. if (dst_multiformat->opencl_ptr == NULL) {
  584. /* XXX : it is weird that we might have to allocate memory here... */
  585. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  586. }
  587. err = _starpu_opencl_copy_opencl_to_ram_async_sync((cl_mem)src_multiformat->opencl_ptr,
  588. src_node,
  589. dst_multiformat->opencl_ptr,
  590. dst_node,
  591. size,
  592. src_multiformat->offset,
  593. (cl_event *)_event,
  594. &ret);
  595. if (STARPU_UNLIKELY(err))
  596. STARPU_OPENCL_REPORT_ERROR(err);
  597. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  598. return ret;
  599. }
  600. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  601. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  602. {
  603. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  604. }
  605. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  606. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  607. {
  608. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  609. }
  610. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  611. void *dst_interface, unsigned dst_node)
  612. {
  613. /* TODO */
  614. }
  615. #endif