multiformat_interface.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 Institut National de Recherche en Informatique et Automatique
  4. * Copyright (C) 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <starpu.h>
  18. #include <common/config.h>
  19. #include <datawizard/coherency.h>
  20. #include <datawizard/copy_driver.h>
  21. #include <datawizard/filters.h>
  22. #include <starpu_hash.h>
  23. #include <starpu_cuda.h>
  24. #include <starpu_opencl.h>
  25. #include <drivers/opencl/driver_opencl.h>
  26. #include <drivers/mic/driver_mic_source.h>
  27. #include <core/task.h>
  28. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. #ifdef STARPU_USE_CUDA
  30. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  31. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  32. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  33. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  34. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  35. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  36. #endif
  37. #ifdef STARPU_USE_OPENCL
  38. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  39. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  40. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  41. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  42. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  43. #endif
  44. #ifdef STARPU_USE_MIC
  45. static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  46. static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  47. static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  48. static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  49. #endif
  50. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  51. {
  52. .ram_to_ram = copy_ram_to_ram,
  53. #ifdef STARPU_USE_CUDA
  54. .ram_to_cuda = copy_ram_to_cuda,
  55. .cuda_to_ram = copy_cuda_to_ram,
  56. .ram_to_cuda_async = copy_ram_to_cuda_async,
  57. .cuda_to_ram_async = copy_cuda_to_ram_async,
  58. .cuda_to_cuda = copy_cuda_to_cuda,
  59. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  60. #else
  61. #ifdef STARPU_SIMGRID
  62. /* Enable GPU-GPU transfers in simgrid */
  63. .cuda_to_cuda_async = 1,
  64. #endif
  65. #endif
  66. #ifdef STARPU_USE_OPENCL
  67. .ram_to_opencl = copy_ram_to_opencl,
  68. .opencl_to_ram = copy_opencl_to_ram,
  69. .opencl_to_opencl = copy_opencl_to_opencl,
  70. .ram_to_opencl_async = copy_ram_to_opencl_async,
  71. .opencl_to_ram_async = copy_opencl_to_ram_async,
  72. #endif
  73. #ifdef STARPU_USE_MIC
  74. .ram_to_mic = copy_ram_to_mic,
  75. .mic_to_ram = copy_mic_to_ram,
  76. .ram_to_mic_async = copy_ram_to_mic_async,
  77. .mic_to_ram_async = copy_mic_to_ram_async,
  78. #endif
  79. };
  80. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
  81. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node);
  82. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, unsigned node);
  83. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node);
  84. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  85. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  86. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  87. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  88. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  89. static struct starpu_multiformat_data_interface_ops*
  90. get_mf_ops(void *data_interface)
  91. {
  92. struct starpu_multiformat_interface *mf;
  93. mf = (struct starpu_multiformat_interface *) data_interface;
  94. return mf->ops;
  95. }
  96. struct starpu_data_interface_ops starpu_interface_multiformat_ops =
  97. {
  98. .register_data_handle = register_multiformat_handle,
  99. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  100. .handle_to_pointer = multiformat_handle_to_pointer,
  101. .free_data_on_node = free_multiformat_buffer_on_node,
  102. .copy_methods = &multiformat_copy_data_methods_s,
  103. .get_size = multiformat_interface_get_size,
  104. .footprint = footprint_multiformat_interface_crc32,
  105. .compare = multiformat_compare,
  106. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  107. .interface_size = sizeof(struct starpu_multiformat_interface),
  108. .display = display_multiformat_interface,
  109. .is_multiformat = 1,
  110. .get_mf_ops = get_mf_ops
  111. };
  112. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
  113. {
  114. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  115. struct starpu_multiformat_interface *multiformat_interface =
  116. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  117. switch(starpu_node_get_kind(node))
  118. {
  119. case STARPU_CPU_RAM:
  120. return multiformat_interface->cpu_ptr;
  121. #ifdef STARPU_USE_CUDA
  122. case STARPU_CUDA_RAM:
  123. return multiformat_interface->cuda_ptr;
  124. #endif
  125. #ifdef STARPU_USE_OPENCL
  126. case STARPU_OPENCL_RAM:
  127. return multiformat_interface->opencl_ptr;
  128. #endif
  129. #ifdef STARPU_USE_MIC
  130. case STARPU_MIC_RAM:
  131. return multiformat_interface->mic_ptr;
  132. #endif
  133. default:
  134. STARPU_ABORT();
  135. }
  136. return NULL;
  137. }
  138. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
  139. {
  140. struct starpu_multiformat_interface *multiformat_interface;
  141. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  142. unsigned node;
  143. for (node = 0; node < STARPU_MAXNODES; node++)
  144. {
  145. struct starpu_multiformat_interface *local_interface =
  146. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  147. if (node == home_node)
  148. {
  149. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  150. #ifdef STARPU_USE_CUDA
  151. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  152. #endif
  153. #ifdef STARPU_USE_OPENCL
  154. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  155. #endif
  156. #ifdef STARPU_USE_MIC
  157. local_interface->mic_ptr = multiformat_interface->mic_ptr;
  158. #endif
  159. }
  160. else
  161. {
  162. local_interface->cpu_ptr = NULL;
  163. #ifdef STARPU_USE_CUDA
  164. local_interface->cuda_ptr = NULL;
  165. #endif
  166. #ifdef STARPU_USE_OPENCL
  167. local_interface->opencl_ptr = NULL;
  168. #endif
  169. #ifdef STARPU_USE_MIC
  170. local_interface->mic_ptr = NULL;
  171. #endif
  172. }
  173. local_interface->id = multiformat_interface->id;
  174. local_interface->nx = multiformat_interface->nx;
  175. local_interface->ops = multiformat_interface->ops;
  176. }
  177. }
  178. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  179. unsigned home_node,
  180. void *ptr,
  181. uint32_t nobjects,
  182. struct starpu_multiformat_data_interface_ops *format_ops)
  183. {
  184. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_opencl_cl);
  185. _starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
  186. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
  187. _starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
  188. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_mic_cl);
  189. _starpu_codelet_check_deprecated_fields(format_ops->mic_to_cpu_cl);
  190. struct starpu_multiformat_interface multiformat =
  191. {
  192. .id = STARPU_MULTIFORMAT_INTERFACE_ID,
  193. .cpu_ptr = ptr,
  194. .cuda_ptr = NULL,
  195. .opencl_ptr = NULL,
  196. .mic_ptr = NULL,
  197. .nx = nobjects,
  198. .ops = format_ops
  199. };
  200. starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops);
  201. }
  202. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  203. {
  204. return starpu_hash_crc32c_be(starpu_multiformat_get_nx(handle), 0);
  205. }
  206. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  207. {
  208. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  209. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  210. return ((multiformat_a->nx == multiformat_b->nx)
  211. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  212. #ifdef STARPU_USE_CUDA
  213. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  214. #endif
  215. #ifdef STARPU_USE_OPENCL
  216. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  217. #endif
  218. #ifdef STARPU_USE_MIC
  219. && (multiformat_a->ops->mic_elemsize == multiformat_b->ops->mic_elemsize)
  220. #endif
  221. );
  222. }
  223. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  224. {
  225. struct starpu_multiformat_interface *multiformat_interface;
  226. multiformat_interface = (struct starpu_multiformat_interface *)
  227. starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  228. fprintf(f, "%u\t", multiformat_interface->nx);
  229. }
  230. /* XXX : returns CPU size */
  231. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  232. {
  233. size_t size;
  234. struct starpu_multiformat_interface *multiformat_interface;
  235. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  236. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  237. return size;
  238. }
  239. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  240. {
  241. struct starpu_multiformat_interface *multiformat_interface;
  242. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  243. return multiformat_interface->nx;
  244. }
  245. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node)
  246. {
  247. struct starpu_multiformat_interface *multiformat_interface;
  248. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  249. uintptr_t addr = 0;
  250. starpu_ssize_t allocated_memory = 0;
  251. size_t size;
  252. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  253. allocated_memory += size;
  254. addr = starpu_malloc_on_node(dst_node, size);
  255. if (!addr)
  256. goto fail_cpu;
  257. multiformat_interface->cpu_ptr = (void *) addr;
  258. #ifdef STARPU_USE_CUDA
  259. size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  260. allocated_memory += size;
  261. addr = starpu_malloc_on_node(dst_node, size);
  262. if (!addr)
  263. goto fail_cuda;
  264. multiformat_interface->cuda_ptr = (void *) addr;
  265. #endif
  266. #ifdef STARPU_USE_OPENCL
  267. size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  268. allocated_memory += size;
  269. addr = starpu_malloc_on_node(dst_node, size);
  270. if (!addr)
  271. goto fail_opencl;
  272. multiformat_interface->opencl_ptr = (void *) addr;
  273. #endif
  274. #ifdef STARPU_USE_MIC
  275. size = multiformat_interface->nx * multiformat_interface->ops->mic_elemsize;
  276. allocated_memory += size;
  277. addr = starpu_malloc_on_node(dst_node, size);
  278. if (!addr)
  279. goto fail_mic;
  280. multiformat_interface->mic_ptr = (void *) addr;
  281. #endif
  282. return allocated_memory;
  283. #ifdef STARPU_USE_MIC
  284. fail_mic:
  285. #endif
  286. #ifdef STARPU_USE_OPENCL
  287. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  288. fail_opencl:
  289. #endif
  290. #ifdef STARPU_USE_CUDA
  291. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  292. fail_cuda:
  293. #endif
  294. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  295. fail_cpu:
  296. return -ENOMEM;
  297. }
  298. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node)
  299. {
  300. struct starpu_multiformat_interface *multiformat_interface;
  301. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  302. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr,
  303. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  304. multiformat_interface->cpu_ptr = NULL;
  305. #ifdef STARPU_USE_CUDA
  306. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr,
  307. multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  308. multiformat_interface->cuda_ptr = NULL;
  309. #endif
  310. #ifdef STARPU_USE_OPENCL
  311. starpu_free_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr,
  312. multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  313. multiformat_interface->opencl_ptr = NULL;
  314. #endif
  315. #ifdef STARPU_USE_MIC
  316. starpu_free_on_node(node, (uintptr_t) multiformat_interface->mic_ptr,
  317. multiformat_interface->nx * multiformat_interface->ops->mic_elemsize);
  318. multiformat_interface->mic_ptr = NULL;
  319. #endif
  320. }
  321. /*
  322. * Copy methods
  323. */
  324. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  325. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  326. {
  327. struct starpu_multiformat_interface *src_multiformat;
  328. struct starpu_multiformat_interface *dst_multiformat;
  329. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  330. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  331. STARPU_ASSERT(src_multiformat != NULL);
  332. STARPU_ASSERT(dst_multiformat != NULL);
  333. STARPU_ASSERT(dst_multiformat->ops != NULL);
  334. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  335. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  336. return 0;
  337. }
  338. #ifdef STARPU_USE_CUDA
  339. static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  340. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  341. enum cudaMemcpyKind kind)
  342. {
  343. struct starpu_multiformat_interface *src_multiformat;
  344. struct starpu_multiformat_interface *dst_multiformat;
  345. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  346. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  347. size_t size;
  348. cudaError_t status;
  349. switch (kind)
  350. {
  351. case cudaMemcpyHostToDevice:
  352. {
  353. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  354. if (src_multiformat->cuda_ptr == NULL)
  355. {
  356. src_multiformat->cuda_ptr = malloc(size);
  357. if (src_multiformat->cuda_ptr == NULL)
  358. return -ENOMEM;
  359. }
  360. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  361. if (STARPU_UNLIKELY(status))
  362. {
  363. STARPU_CUDA_REPORT_ERROR(status);
  364. }
  365. break;
  366. }
  367. case cudaMemcpyDeviceToHost:
  368. {
  369. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  370. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  371. if (STARPU_UNLIKELY(status))
  372. STARPU_CUDA_REPORT_ERROR(status);
  373. break;
  374. }
  375. case cudaMemcpyDeviceToDevice:
  376. {
  377. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  378. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  379. if (STARPU_UNLIKELY(status))
  380. STARPU_CUDA_REPORT_ERROR(status);
  381. break;
  382. }
  383. default:
  384. STARPU_ABORT();
  385. }
  386. return 0;
  387. }
  388. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  389. {
  390. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  391. }
  392. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  393. {
  394. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  395. }
  396. static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  397. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  398. cudaStream_t stream, enum cudaMemcpyKind kind)
  399. {
  400. struct starpu_multiformat_interface *src_multiformat;
  401. struct starpu_multiformat_interface *dst_multiformat;
  402. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  403. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  404. size_t size;
  405. cudaError_t status;
  406. switch (kind)
  407. {
  408. case cudaMemcpyHostToDevice:
  409. {
  410. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  411. if (src_multiformat->cuda_ptr == NULL)
  412. {
  413. src_multiformat->cuda_ptr = malloc(size);
  414. if (src_multiformat->cuda_ptr == NULL)
  415. return -ENOMEM;
  416. }
  417. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  418. if (STARPU_UNLIKELY(status))
  419. {
  420. STARPU_CUDA_REPORT_ERROR(status);
  421. }
  422. break;
  423. }
  424. case cudaMemcpyDeviceToHost:
  425. {
  426. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  427. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  428. if (STARPU_UNLIKELY(status))
  429. STARPU_CUDA_REPORT_ERROR(status);
  430. break;
  431. }
  432. case cudaMemcpyDeviceToDevice:
  433. {
  434. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  435. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  436. if (STARPU_UNLIKELY(status))
  437. STARPU_CUDA_REPORT_ERROR(status);
  438. break;
  439. }
  440. default:
  441. STARPU_ABORT();
  442. }
  443. return 0;
  444. }
  445. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  446. {
  447. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  448. }
  449. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  450. {
  451. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  452. }
  453. #ifdef HAVE_CUDA_MEMCPY_PEER
  454. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  455. void *dst_interface, unsigned dst_node,
  456. cudaStream_t stream)
  457. {
  458. struct starpu_multiformat_interface *src_multiformat;
  459. struct starpu_multiformat_interface *dst_multiformat;
  460. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  461. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  462. STARPU_ASSERT(src_multiformat != NULL);
  463. STARPU_ASSERT(dst_multiformat != NULL);
  464. STARPU_ASSERT(src_multiformat->ops != NULL);
  465. cudaError_t status;
  466. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  467. int src_dev = _starpu_memory_node_get_devid(src_node);
  468. int dst_dev = _starpu_memory_node_get_devid(dst_node);
  469. if (stream)
  470. {
  471. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  472. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  473. src_multiformat->cuda_ptr, src_dev,
  474. size, stream);
  475. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  476. /* All good ! Still, returning -EAGAIN, because we will need to
  477. check the transfert completion later */
  478. if (status == cudaSuccess)
  479. return -EAGAIN;
  480. }
  481. /* Either a synchronous transfert was requested, or the asynchronous one
  482. failed. */
  483. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  484. src_multiformat->cuda_ptr, src_dev,
  485. size);
  486. if (STARPU_UNLIKELY(status != cudaSuccess))
  487. STARPU_CUDA_REPORT_ERROR(status);
  488. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  489. return 0;
  490. }
  491. #endif
  492. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  493. {
  494. if (src_node == dst_node)
  495. {
  496. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  497. }
  498. else
  499. {
  500. #ifdef HAVE_CUDA_MEMCPY_PEER
  501. return copy_cuda_peer_common(src_interface, src_node,
  502. dst_interface, dst_node,
  503. NULL);
  504. #else
  505. STARPU_ABORT();
  506. #endif
  507. }
  508. }
  509. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  510. void *dst_interface, unsigned dst_node,
  511. cudaStream_t stream)
  512. {
  513. if (src_node == dst_node)
  514. {
  515. return copy_cuda_common_async(src_interface, src_node,
  516. dst_interface, dst_node,
  517. stream, cudaMemcpyDeviceToDevice);
  518. }
  519. else
  520. {
  521. #ifdef HAVE_CUDA_MEMCPY_PEER
  522. return copy_cuda_peer_common(src_interface, src_node,
  523. dst_interface, dst_node,
  524. stream);
  525. #else
  526. STARPU_ABORT();
  527. #endif
  528. }
  529. }
  530. #endif /* STARPU_USE_CUDA */
  531. #ifdef STARPU_USE_OPENCL
  532. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  533. void *dst_interface, unsigned dst_node,
  534. cl_event *event)
  535. {
  536. int err, ret;
  537. size_t size;
  538. struct starpu_multiformat_interface *src_multiformat;
  539. struct starpu_multiformat_interface *dst_multiformat;
  540. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  541. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  542. STARPU_ASSERT(src_multiformat != NULL);
  543. STARPU_ASSERT(dst_multiformat != NULL);
  544. STARPU_ASSERT(src_multiformat->ops != NULL);
  545. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  546. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  547. src_node,
  548. (cl_mem) dst_multiformat->cpu_ptr,
  549. dst_node,
  550. size,
  551. 0,
  552. event,
  553. &ret);
  554. if (STARPU_UNLIKELY(err))
  555. STARPU_OPENCL_REPORT_ERROR(err);
  556. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  557. return ret;
  558. }
  559. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  560. void *dst_interface, unsigned dst_node,
  561. cl_event *event)
  562. {
  563. int err, ret;
  564. size_t size;
  565. struct starpu_multiformat_interface *src_multiformat;
  566. struct starpu_multiformat_interface *dst_multiformat;
  567. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  568. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  569. STARPU_ASSERT(src_multiformat != NULL);
  570. STARPU_ASSERT(dst_multiformat != NULL);
  571. STARPU_ASSERT(src_multiformat->ops != NULL);
  572. STARPU_ASSERT(dst_multiformat->ops != NULL);
  573. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  574. if (dst_multiformat->opencl_ptr == NULL)
  575. {
  576. /* XXX : it is weird that we might have to allocate memory here... */
  577. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  578. }
  579. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  580. src_node,
  581. dst_multiformat->opencl_ptr,
  582. dst_node,
  583. size,
  584. 0,
  585. event,
  586. &ret);
  587. if (STARPU_UNLIKELY(err))
  588. STARPU_OPENCL_REPORT_ERROR(err);
  589. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  590. return ret;
  591. }
  592. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  593. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  594. {
  595. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  596. }
  597. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  598. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  599. {
  600. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  601. }
  602. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  603. void *dst_interface, unsigned dst_node)
  604. {
  605. (void) src_interface;
  606. (void) dst_interface;
  607. (void) src_node;
  608. (void) dst_node;
  609. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  610. return 0;
  611. }
  612. #endif
  613. #ifdef STARPU_USE_MIC
  614. static int copy_mic_common_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
  615. int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
  616. {
  617. struct starpu_multiformat_interface *src_multiformat = src_interface;
  618. struct starpu_multiformat_interface *dst_multiformat = dst_interface;
  619. STARPU_ASSERT(src_multiformat != NULL);
  620. STARPU_ASSERT(dst_multiformat != NULL);
  621. STARPU_ASSERT(dst_multiformat->ops != NULL);
  622. size_t size = dst_multiformat->nx * dst_multiformat->ops->mic_elemsize;
  623. if (src_multiformat->mic_ptr == NULL)
  624. {
  625. src_multiformat->mic_ptr = malloc(size);
  626. if (src_multiformat->mic_ptr == NULL)
  627. return -ENOMEM;
  628. }
  629. copy_func(src_multiformat->cpu_ptr, src_node, dst_multiformat->cpu_ptr, dst_node, size);
  630. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  631. return 0;
  632. }
  633. static int copy_mic_common_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
  634. int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
  635. {
  636. struct starpu_multiformat_interface *src_multiformat = src_interface;
  637. struct starpu_multiformat_interface *dst_multiformat = dst_interface;
  638. STARPU_ASSERT(src_multiformat != NULL);
  639. STARPU_ASSERT(dst_multiformat != NULL);
  640. STARPU_ASSERT(dst_multiformat->ops != NULL);
  641. size_t size = src_multiformat->nx * src_multiformat->ops->mic_elemsize;
  642. copy_func(src_multiformat->mic_ptr, src_node, dst_multiformat->mic_ptr, dst_node, size);
  643. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  644. return 0;
  645. }
  646. static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  647. {
  648. return copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
  649. }
  650. static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  651. {
  652. return copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
  653. }
  654. static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  655. {
  656. copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
  657. return -EAGAIN;
  658. }
  659. static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  660. {
  661. copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
  662. return -EAGAIN;
  663. }
  664. #endif