multiformat_interface.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2012 INRIA
  4. * Copyright (C) 2012, 2013, 2014 CNRS
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <starpu.h>
  18. #include <common/config.h>
  19. #include <datawizard/coherency.h>
  20. #include <datawizard/copy_driver.h>
  21. #include <datawizard/filters.h>
  22. #include <datawizard/memory_nodes.h>
  23. #include <starpu_hash.h>
  24. #include <starpu_cuda.h>
  25. #include <starpu_opencl.h>
  26. #include <drivers/opencl/driver_opencl.h>
  27. #include <drivers/mic/driver_mic_source.h>
  28. #include <core/task.h>
  29. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  30. #ifdef STARPU_USE_CUDA
  31. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  32. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  33. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  34. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  35. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  36. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  37. #endif
  38. #ifdef STARPU_USE_OPENCL
  39. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  40. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  41. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  42. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  43. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  44. #endif
  45. #ifdef STARPU_USE_MIC
  46. static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  47. static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  48. static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  49. static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  50. #endif
  51. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  52. {
  53. .ram_to_ram = copy_ram_to_ram,
  54. #ifdef STARPU_USE_CUDA
  55. .ram_to_cuda = copy_ram_to_cuda,
  56. .cuda_to_ram = copy_cuda_to_ram,
  57. .ram_to_cuda_async = copy_ram_to_cuda_async,
  58. .cuda_to_ram_async = copy_cuda_to_ram_async,
  59. .cuda_to_cuda = copy_cuda_to_cuda,
  60. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  61. #else
  62. #ifdef STARPU_SIMGRID
  63. /* Enable GPU-GPU transfers in simgrid */
  64. .cuda_to_cuda_async = (void *)1,
  65. #endif
  66. #endif
  67. #ifdef STARPU_USE_OPENCL
  68. .ram_to_opencl = copy_ram_to_opencl,
  69. .opencl_to_ram = copy_opencl_to_ram,
  70. .opencl_to_opencl = copy_opencl_to_opencl,
  71. .ram_to_opencl_async = copy_ram_to_opencl_async,
  72. .opencl_to_ram_async = copy_opencl_to_ram_async,
  73. #endif
  74. #ifdef STARPU_USE_MIC
  75. .ram_to_mic = copy_ram_to_mic,
  76. .mic_to_ram = copy_mic_to_ram,
  77. .ram_to_mic_async = copy_ram_to_mic_async,
  78. .mic_to_ram_async = copy_mic_to_ram_async,
  79. #endif
  80. };
  81. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
  82. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node);
  83. static void *multiformat_handle_to_pointer(starpu_data_handle_t data_handle, unsigned node);
  84. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node);
  85. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  86. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  87. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  88. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  89. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  90. static struct starpu_multiformat_data_interface_ops*
  91. get_mf_ops(void *data_interface)
  92. {
  93. struct starpu_multiformat_interface *mf;
  94. mf = (struct starpu_multiformat_interface *) data_interface;
  95. return mf->ops;
  96. }
  97. struct starpu_data_interface_ops starpu_interface_multiformat_ops =
  98. {
  99. .register_data_handle = register_multiformat_handle,
  100. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  101. .handle_to_pointer = multiformat_handle_to_pointer,
  102. .free_data_on_node = free_multiformat_buffer_on_node,
  103. .copy_methods = &multiformat_copy_data_methods_s,
  104. .get_size = multiformat_interface_get_size,
  105. .footprint = footprint_multiformat_interface_crc32,
  106. .compare = multiformat_compare,
  107. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  108. .interface_size = sizeof(struct starpu_multiformat_interface),
  109. .display = display_multiformat_interface,
  110. .is_multiformat = 1,
  111. .get_mf_ops = get_mf_ops
  112. };
  113. static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, unsigned node)
  114. {
  115. STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
  116. struct starpu_multiformat_interface *multiformat_interface =
  117. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  118. switch(starpu_node_get_kind(node))
  119. {
  120. case STARPU_CPU_RAM:
  121. return multiformat_interface->cpu_ptr;
  122. #ifdef STARPU_USE_CUDA
  123. case STARPU_CUDA_RAM:
  124. return multiformat_interface->cuda_ptr;
  125. #endif
  126. #ifdef STARPU_USE_OPENCL
  127. case STARPU_OPENCL_RAM:
  128. return multiformat_interface->opencl_ptr;
  129. #endif
  130. #ifdef STARPU_USE_MIC
  131. case STARPU_MIC_RAM:
  132. return multiformat_interface->mic_ptr;
  133. #endif
  134. default:
  135. STARPU_ABORT();
  136. }
  137. return NULL;
  138. }
  139. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
  140. {
  141. struct starpu_multiformat_interface *multiformat_interface;
  142. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  143. unsigned node;
  144. for (node = 0; node < STARPU_MAXNODES; node++)
  145. {
  146. struct starpu_multiformat_interface *local_interface =
  147. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  148. if (node == home_node)
  149. {
  150. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  151. #ifdef STARPU_USE_CUDA
  152. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  153. #endif
  154. #ifdef STARPU_USE_OPENCL
  155. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  156. #endif
  157. #ifdef STARPU_USE_MIC
  158. local_interface->mic_ptr = multiformat_interface->mic_ptr;
  159. #endif
  160. }
  161. else
  162. {
  163. local_interface->cpu_ptr = NULL;
  164. #ifdef STARPU_USE_CUDA
  165. local_interface->cuda_ptr = NULL;
  166. #endif
  167. #ifdef STARPU_USE_OPENCL
  168. local_interface->opencl_ptr = NULL;
  169. #endif
  170. #ifdef STARPU_USE_MIC
  171. local_interface->mic_ptr = NULL;
  172. #endif
  173. }
  174. local_interface->id = multiformat_interface->id;
  175. local_interface->nx = multiformat_interface->nx;
  176. local_interface->ops = multiformat_interface->ops;
  177. }
  178. }
  179. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  180. unsigned home_node,
  181. void *ptr,
  182. uint32_t nobjects,
  183. struct starpu_multiformat_data_interface_ops *format_ops)
  184. {
  185. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_opencl_cl);
  186. _starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
  187. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
  188. _starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
  189. _starpu_codelet_check_deprecated_fields(format_ops->cpu_to_mic_cl);
  190. _starpu_codelet_check_deprecated_fields(format_ops->mic_to_cpu_cl);
  191. struct starpu_multiformat_interface multiformat =
  192. {
  193. .id = STARPU_MULTIFORMAT_INTERFACE_ID,
  194. .cpu_ptr = ptr,
  195. .cuda_ptr = NULL,
  196. .opencl_ptr = NULL,
  197. .mic_ptr = NULL,
  198. .nx = nobjects,
  199. .ops = format_ops
  200. };
  201. starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops);
  202. }
  203. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  204. {
  205. return starpu_hash_crc32c_be(starpu_multiformat_get_nx(handle), 0);
  206. }
  207. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  208. {
  209. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  210. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  211. return ((multiformat_a->nx == multiformat_b->nx)
  212. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  213. #ifdef STARPU_USE_CUDA
  214. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  215. #endif
  216. #ifdef STARPU_USE_OPENCL
  217. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  218. #endif
  219. #ifdef STARPU_USE_MIC
  220. && (multiformat_a->ops->mic_elemsize == multiformat_b->ops->mic_elemsize)
  221. #endif
  222. );
  223. }
  224. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  225. {
  226. struct starpu_multiformat_interface *multiformat_interface;
  227. multiformat_interface = (struct starpu_multiformat_interface *)
  228. starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  229. fprintf(f, "%u\t", multiformat_interface->nx);
  230. }
  231. /* XXX : returns CPU size */
  232. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  233. {
  234. size_t size;
  235. struct starpu_multiformat_interface *multiformat_interface;
  236. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  237. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  238. return size;
  239. }
  240. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  241. {
  242. struct starpu_multiformat_interface *multiformat_interface;
  243. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  244. return multiformat_interface->nx;
  245. }
  246. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node)
  247. {
  248. struct starpu_multiformat_interface *multiformat_interface;
  249. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  250. uintptr_t addr = 0;
  251. starpu_ssize_t allocated_memory = 0;
  252. size_t size;
  253. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  254. allocated_memory += size;
  255. addr = starpu_malloc_on_node(dst_node, size);
  256. if (!addr)
  257. goto fail_cpu;
  258. multiformat_interface->cpu_ptr = (void *) addr;
  259. #ifdef STARPU_USE_CUDA
  260. size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  261. allocated_memory += size;
  262. addr = starpu_malloc_on_node(dst_node, size);
  263. if (!addr)
  264. goto fail_cuda;
  265. multiformat_interface->cuda_ptr = (void *) addr;
  266. #endif
  267. #ifdef STARPU_USE_OPENCL
  268. size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  269. allocated_memory += size;
  270. addr = starpu_malloc_on_node(dst_node, size);
  271. if (!addr)
  272. goto fail_opencl;
  273. multiformat_interface->opencl_ptr = (void *) addr;
  274. #endif
  275. #ifdef STARPU_USE_MIC
  276. size = multiformat_interface->nx * multiformat_interface->ops->mic_elemsize;
  277. allocated_memory += size;
  278. addr = starpu_malloc_on_node(dst_node, size);
  279. if (!addr)
  280. goto fail_mic;
  281. multiformat_interface->mic_ptr = (void *) addr;
  282. #endif
  283. return allocated_memory;
  284. #ifdef STARPU_USE_MIC
  285. fail_mic:
  286. #endif
  287. #ifdef STARPU_USE_OPENCL
  288. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  289. fail_opencl:
  290. #endif
  291. #ifdef STARPU_USE_CUDA
  292. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  293. fail_cuda:
  294. #endif
  295. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  296. fail_cpu:
  297. return -ENOMEM;
  298. }
  299. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node)
  300. {
  301. struct starpu_multiformat_interface *multiformat_interface;
  302. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  303. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr,
  304. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  305. multiformat_interface->cpu_ptr = NULL;
  306. #ifdef STARPU_USE_CUDA
  307. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr,
  308. multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  309. multiformat_interface->cuda_ptr = NULL;
  310. #endif
  311. #ifdef STARPU_USE_OPENCL
  312. starpu_free_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr,
  313. multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  314. multiformat_interface->opencl_ptr = NULL;
  315. #endif
  316. #ifdef STARPU_USE_MIC
  317. starpu_free_on_node(node, (uintptr_t) multiformat_interface->mic_ptr,
  318. multiformat_interface->nx * multiformat_interface->ops->mic_elemsize);
  319. multiformat_interface->mic_ptr = NULL;
  320. #endif
  321. }
  322. /*
  323. * Copy methods
  324. */
  325. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  326. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  327. {
  328. struct starpu_multiformat_interface *src_multiformat;
  329. struct starpu_multiformat_interface *dst_multiformat;
  330. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  331. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  332. STARPU_ASSERT(src_multiformat != NULL);
  333. STARPU_ASSERT(dst_multiformat != NULL);
  334. STARPU_ASSERT(dst_multiformat->ops != NULL);
  335. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  336. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  337. return 0;
  338. }
  339. #ifdef STARPU_USE_CUDA
  340. static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  341. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  342. enum cudaMemcpyKind kind)
  343. {
  344. struct starpu_multiformat_interface *src_multiformat;
  345. struct starpu_multiformat_interface *dst_multiformat;
  346. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  347. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  348. size_t size;
  349. cudaError_t status;
  350. switch (kind)
  351. {
  352. case cudaMemcpyHostToDevice:
  353. {
  354. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  355. if (src_multiformat->cuda_ptr == NULL)
  356. {
  357. src_multiformat->cuda_ptr = malloc(size);
  358. if (src_multiformat->cuda_ptr == NULL)
  359. return -ENOMEM;
  360. }
  361. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  362. if (STARPU_UNLIKELY(status))
  363. {
  364. STARPU_CUDA_REPORT_ERROR(status);
  365. }
  366. break;
  367. }
  368. case cudaMemcpyDeviceToHost:
  369. {
  370. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  371. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  372. if (STARPU_UNLIKELY(status))
  373. STARPU_CUDA_REPORT_ERROR(status);
  374. break;
  375. }
  376. case cudaMemcpyDeviceToDevice:
  377. {
  378. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  379. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  380. if (STARPU_UNLIKELY(status))
  381. STARPU_CUDA_REPORT_ERROR(status);
  382. break;
  383. }
  384. default:
  385. STARPU_ABORT();
  386. }
  387. return 0;
  388. }
  389. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  390. {
  391. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  392. }
  393. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  394. {
  395. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  396. }
  397. static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  398. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  399. cudaStream_t stream, enum cudaMemcpyKind kind)
  400. {
  401. struct starpu_multiformat_interface *src_multiformat;
  402. struct starpu_multiformat_interface *dst_multiformat;
  403. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  404. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  405. size_t size;
  406. cudaError_t status;
  407. switch (kind)
  408. {
  409. case cudaMemcpyHostToDevice:
  410. {
  411. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  412. if (src_multiformat->cuda_ptr == NULL)
  413. {
  414. src_multiformat->cuda_ptr = malloc(size);
  415. if (src_multiformat->cuda_ptr == NULL)
  416. return -ENOMEM;
  417. }
  418. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  419. if (STARPU_UNLIKELY(status))
  420. {
  421. STARPU_CUDA_REPORT_ERROR(status);
  422. }
  423. break;
  424. }
  425. case cudaMemcpyDeviceToHost:
  426. {
  427. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  428. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  429. if (STARPU_UNLIKELY(status))
  430. STARPU_CUDA_REPORT_ERROR(status);
  431. break;
  432. }
  433. case cudaMemcpyDeviceToDevice:
  434. {
  435. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  436. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  437. if (STARPU_UNLIKELY(status))
  438. STARPU_CUDA_REPORT_ERROR(status);
  439. break;
  440. }
  441. default:
  442. STARPU_ABORT();
  443. }
  444. return 0;
  445. }
  446. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  447. {
  448. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  449. }
  450. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  451. {
  452. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  453. }
  454. #ifdef HAVE_CUDA_MEMCPY_PEER
  455. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  456. void *dst_interface, unsigned dst_node,
  457. cudaStream_t stream)
  458. {
  459. struct starpu_multiformat_interface *src_multiformat;
  460. struct starpu_multiformat_interface *dst_multiformat;
  461. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  462. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  463. STARPU_ASSERT(src_multiformat != NULL);
  464. STARPU_ASSERT(dst_multiformat != NULL);
  465. STARPU_ASSERT(src_multiformat->ops != NULL);
  466. cudaError_t status;
  467. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  468. int src_dev = _starpu_memory_node_get_devid(src_node);
  469. int dst_dev = _starpu_memory_node_get_devid(dst_node);
  470. if (stream)
  471. {
  472. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  473. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  474. src_multiformat->cuda_ptr, src_dev,
  475. size, stream);
  476. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  477. /* All good ! Still, returning -EAGAIN, because we will need to
  478. check the transfert completion later */
  479. if (status == cudaSuccess)
  480. return -EAGAIN;
  481. }
  482. /* Either a synchronous transfert was requested, or the asynchronous one
  483. failed. */
  484. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  485. src_multiformat->cuda_ptr, src_dev,
  486. size);
  487. if (STARPU_UNLIKELY(status != cudaSuccess))
  488. STARPU_CUDA_REPORT_ERROR(status);
  489. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  490. return 0;
  491. }
  492. #endif
  493. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  494. {
  495. if (src_node == dst_node)
  496. {
  497. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  498. }
  499. else
  500. {
  501. #ifdef HAVE_CUDA_MEMCPY_PEER
  502. return copy_cuda_peer_common(src_interface, src_node,
  503. dst_interface, dst_node,
  504. NULL);
  505. #else
  506. STARPU_ABORT();
  507. #endif
  508. }
  509. }
  510. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  511. void *dst_interface, unsigned dst_node,
  512. cudaStream_t stream)
  513. {
  514. if (src_node == dst_node)
  515. {
  516. return copy_cuda_common_async(src_interface, src_node,
  517. dst_interface, dst_node,
  518. stream, cudaMemcpyDeviceToDevice);
  519. }
  520. else
  521. {
  522. #ifdef HAVE_CUDA_MEMCPY_PEER
  523. return copy_cuda_peer_common(src_interface, src_node,
  524. dst_interface, dst_node,
  525. stream);
  526. #else
  527. STARPU_ABORT();
  528. #endif
  529. }
  530. }
  531. #endif /* STARPU_USE_CUDA */
  532. #ifdef STARPU_USE_OPENCL
  533. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  534. void *dst_interface, unsigned dst_node,
  535. cl_event *event)
  536. {
  537. int err, ret;
  538. size_t size;
  539. struct starpu_multiformat_interface *src_multiformat;
  540. struct starpu_multiformat_interface *dst_multiformat;
  541. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  542. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  543. STARPU_ASSERT(src_multiformat != NULL);
  544. STARPU_ASSERT(dst_multiformat != NULL);
  545. STARPU_ASSERT(src_multiformat->ops != NULL);
  546. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  547. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  548. src_node,
  549. (cl_mem) dst_multiformat->cpu_ptr,
  550. dst_node,
  551. size,
  552. 0,
  553. event,
  554. &ret);
  555. if (STARPU_UNLIKELY(err))
  556. STARPU_OPENCL_REPORT_ERROR(err);
  557. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  558. return ret;
  559. }
  560. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  561. void *dst_interface, unsigned dst_node,
  562. cl_event *event)
  563. {
  564. int err, ret;
  565. size_t size;
  566. struct starpu_multiformat_interface *src_multiformat;
  567. struct starpu_multiformat_interface *dst_multiformat;
  568. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  569. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  570. STARPU_ASSERT(src_multiformat != NULL);
  571. STARPU_ASSERT(dst_multiformat != NULL);
  572. STARPU_ASSERT(src_multiformat->ops != NULL);
  573. STARPU_ASSERT(dst_multiformat->ops != NULL);
  574. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  575. if (dst_multiformat->opencl_ptr == NULL)
  576. {
  577. /* XXX : it is weird that we might have to allocate memory here... */
  578. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  579. }
  580. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  581. src_node,
  582. dst_multiformat->opencl_ptr,
  583. dst_node,
  584. size,
  585. 0,
  586. event,
  587. &ret);
  588. if (STARPU_UNLIKELY(err))
  589. STARPU_OPENCL_REPORT_ERROR(err);
  590. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  591. return ret;
  592. }
  593. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  594. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  595. {
  596. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  597. }
  598. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  599. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  600. {
  601. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  602. }
  603. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  604. void *dst_interface, unsigned dst_node)
  605. {
  606. (void) src_interface;
  607. (void) dst_interface;
  608. (void) src_node;
  609. (void) dst_node;
  610. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  611. return 0;
  612. }
  613. #endif
  614. #ifdef STARPU_USE_MIC
  615. static int copy_mic_common_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
  616. int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
  617. {
  618. struct starpu_multiformat_interface *src_multiformat = src_interface;
  619. struct starpu_multiformat_interface *dst_multiformat = dst_interface;
  620. STARPU_ASSERT(src_multiformat != NULL);
  621. STARPU_ASSERT(dst_multiformat != NULL);
  622. STARPU_ASSERT(dst_multiformat->ops != NULL);
  623. size_t size = dst_multiformat->nx * dst_multiformat->ops->mic_elemsize;
  624. if (src_multiformat->mic_ptr == NULL)
  625. {
  626. src_multiformat->mic_ptr = malloc(size);
  627. if (src_multiformat->mic_ptr == NULL)
  628. return -ENOMEM;
  629. }
  630. copy_func(src_multiformat->cpu_ptr, src_node, dst_multiformat->cpu_ptr, dst_node, size);
  631. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  632. return 0;
  633. }
  634. static int copy_mic_common_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
  635. int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
  636. {
  637. struct starpu_multiformat_interface *src_multiformat = src_interface;
  638. struct starpu_multiformat_interface *dst_multiformat = dst_interface;
  639. STARPU_ASSERT(src_multiformat != NULL);
  640. STARPU_ASSERT(dst_multiformat != NULL);
  641. STARPU_ASSERT(dst_multiformat->ops != NULL);
  642. size_t size = src_multiformat->nx * src_multiformat->ops->mic_elemsize;
  643. copy_func(src_multiformat->mic_ptr, src_node, dst_multiformat->mic_ptr, dst_node, size);
  644. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  645. return 0;
  646. }
  647. static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  648. {
  649. return copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
  650. }
  651. static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  652. {
  653. return copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
  654. }
  655. static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  656. {
  657. copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
  658. return -EAGAIN;
  659. }
  660. static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  661. {
  662. copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
  663. return -EAGAIN;
  664. }
  665. #endif