multiformat_interface.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  18. #ifdef STARPU_USE_CUDA
  19. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  20. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  21. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  22. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  23. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
  24. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
  25. #endif
  26. #ifdef STARPU_USE_OPENCL
  27. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  28. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  29. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
  30. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  31. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
  32. #endif
  33. #ifdef STARPU_USE_MIC
  34. static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  35. static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  36. static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  37. static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
  38. #endif
  39. static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
  40. {
  41. .ram_to_ram = copy_ram_to_ram,
  42. #ifdef STARPU_USE_CUDA
  43. .ram_to_cuda = copy_ram_to_cuda,
  44. .cuda_to_ram = copy_cuda_to_ram,
  45. .ram_to_cuda_async = copy_ram_to_cuda_async,
  46. .cuda_to_ram_async = copy_cuda_to_ram_async,
  47. .cuda_to_cuda = copy_cuda_to_cuda,
  48. .cuda_to_cuda_async = copy_cuda_to_cuda_async,
  49. #else
  50. #ifdef STARPU_SIMGRID
  51. /* Enable GPU-GPU transfers in simgrid */
  52. .cuda_to_cuda_async = (void *)1,
  53. #endif
  54. #endif
  55. #ifdef STARPU_USE_OPENCL
  56. .ram_to_opencl = copy_ram_to_opencl,
  57. .opencl_to_ram = copy_opencl_to_ram,
  58. .opencl_to_opencl = copy_opencl_to_opencl,
  59. .ram_to_opencl_async = copy_ram_to_opencl_async,
  60. .opencl_to_ram_async = copy_opencl_to_ram_async,
  61. #endif
  62. #ifdef STARPU_USE_MIC
  63. .ram_to_mic = copy_ram_to_mic,
  64. .mic_to_ram = copy_mic_to_ram,
  65. .ram_to_mic_async = copy_ram_to_mic_async,
  66. .mic_to_ram_async = copy_mic_to_ram_async,
  67. #endif
  68. };
  69. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
  70. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node);
  71. static void *multiformat_to_pointer(void *data_interface, unsigned node);
  72. static int multiformat_pointer_is_inside(void *data_interface, unsigned node, void *ptr);
  73. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node);
  74. static size_t multiformat_interface_get_size(starpu_data_handle_t handle);
  75. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle);
  76. static int multiformat_compare(void *data_interface_a, void *data_interface_b);
  77. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f);
  78. static uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle);
  79. static struct starpu_multiformat_data_interface_ops*
  80. get_mf_ops(void *data_interface)
  81. {
  82. struct starpu_multiformat_interface *mf;
  83. mf = (struct starpu_multiformat_interface *) data_interface;
  84. return mf->ops;
  85. }
  86. struct starpu_data_interface_ops starpu_interface_multiformat_ops =
  87. {
  88. .register_data_handle = register_multiformat_handle,
  89. .allocate_data_on_node = allocate_multiformat_buffer_on_node,
  90. .to_pointer = multiformat_to_pointer,
  91. .pointer_is_inside = multiformat_pointer_is_inside,
  92. .free_data_on_node = free_multiformat_buffer_on_node,
  93. .copy_methods = &multiformat_copy_data_methods_s,
  94. .get_size = multiformat_interface_get_size,
  95. .footprint = footprint_multiformat_interface_crc32,
  96. .compare = multiformat_compare,
  97. .interfaceid = STARPU_MULTIFORMAT_INTERFACE_ID,
  98. .interface_size = sizeof(struct starpu_multiformat_interface),
  99. .display = display_multiformat_interface,
  100. .is_multiformat = 1,
  101. .get_mf_ops = get_mf_ops
  102. };
  103. static void *multiformat_to_pointer(void *data_interface, unsigned node)
  104. {
  105. struct starpu_multiformat_interface *multiformat_interface = data_interface;
  106. switch(starpu_node_get_kind(node))
  107. {
  108. case STARPU_CPU_RAM:
  109. return multiformat_interface->cpu_ptr;
  110. #ifdef STARPU_USE_CUDA
  111. case STARPU_CUDA_RAM:
  112. return multiformat_interface->cuda_ptr;
  113. #endif
  114. #ifdef STARPU_USE_OPENCL
  115. case STARPU_OPENCL_RAM:
  116. return multiformat_interface->opencl_ptr;
  117. #endif
  118. #ifdef STARPU_USE_MIC
  119. case STARPU_MIC_RAM:
  120. return multiformat_interface->mic_ptr;
  121. #endif
  122. default:
  123. STARPU_ABORT();
  124. }
  125. return NULL;
  126. }
  127. static int multiformat_pointer_is_inside(void *data_interface, unsigned node, void *ptr)
  128. {
  129. struct starpu_multiformat_interface *multiformat_interface = data_interface;
  130. switch(starpu_node_get_kind(node))
  131. {
  132. case STARPU_CPU_RAM:
  133. return (char*) ptr >= (char*) multiformat_interface->cpu_ptr &&
  134. (char*) ptr < (char*) multiformat_interface->cpu_ptr + multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  135. #ifdef STARPU_USE_CUDA
  136. case STARPU_CUDA_RAM:
  137. return (char*) ptr >= (char*) multiformat_interface->cuda_ptr &&
  138. (char*) ptr < (char*) multiformat_interface->cuda_ptr + multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  139. #endif
  140. #ifdef STARPU_USE_OPENCL
  141. case STARPU_OPENCL_RAM:
  142. return (char*) ptr >= (char*) multiformat_interface->opencl_ptr &&
  143. (char*) ptr < (char*) multiformat_interface->opencl_ptr + multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  144. #endif
  145. #ifdef STARPU_USE_MIC
  146. case STARPU_MIC_RAM:
  147. return (char*) ptr >= (char*) multiformat_interface->mic_ptr &&
  148. (char*) ptr < (char*) multiformat_interface->mic_ptr + multiformat_interface->nx * multiformat_interface->ops->mic_elemsize;
  149. #endif
  150. default:
  151. STARPU_ABORT();
  152. }
  153. return -1;
  154. }
  155. static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
  156. {
  157. struct starpu_multiformat_interface *multiformat_interface;
  158. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  159. unsigned node;
  160. for (node = 0; node < STARPU_MAXNODES; node++)
  161. {
  162. struct starpu_multiformat_interface *local_interface =
  163. (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, node);
  164. if (node == home_node)
  165. {
  166. local_interface->cpu_ptr = multiformat_interface->cpu_ptr;
  167. #ifdef STARPU_USE_CUDA
  168. local_interface->cuda_ptr = multiformat_interface->cuda_ptr;
  169. #endif
  170. #ifdef STARPU_USE_OPENCL
  171. local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
  172. #endif
  173. #ifdef STARPU_USE_MIC
  174. local_interface->mic_ptr = multiformat_interface->mic_ptr;
  175. #endif
  176. }
  177. else
  178. {
  179. local_interface->cpu_ptr = NULL;
  180. #ifdef STARPU_USE_CUDA
  181. local_interface->cuda_ptr = NULL;
  182. #endif
  183. #ifdef STARPU_USE_OPENCL
  184. local_interface->opencl_ptr = NULL;
  185. #endif
  186. #ifdef STARPU_USE_MIC
  187. local_interface->mic_ptr = NULL;
  188. #endif
  189. }
  190. local_interface->id = multiformat_interface->id;
  191. local_interface->nx = multiformat_interface->nx;
  192. local_interface->ops = multiformat_interface->ops;
  193. }
  194. }
  195. void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
  196. int home_node,
  197. void *ptr,
  198. uint32_t nobjects,
  199. struct starpu_multiformat_data_interface_ops *format_ops)
  200. {
  201. struct starpu_multiformat_interface multiformat =
  202. {
  203. .id = STARPU_MULTIFORMAT_INTERFACE_ID,
  204. .cpu_ptr = ptr,
  205. .cuda_ptr = NULL,
  206. .opencl_ptr = NULL,
  207. .mic_ptr = NULL,
  208. .nx = nobjects,
  209. .ops = format_ops
  210. };
  211. starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops);
  212. }
  213. static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
  214. {
  215. return starpu_hash_crc32c_be(starpu_multiformat_get_nx(handle), 0);
  216. }
  217. static int multiformat_compare(void *data_interface_a, void *data_interface_b)
  218. {
  219. struct starpu_multiformat_interface *multiformat_a = (struct starpu_multiformat_interface *) data_interface_a;
  220. struct starpu_multiformat_interface *multiformat_b = (struct starpu_multiformat_interface *) data_interface_b;
  221. return (multiformat_a->nx == multiformat_b->nx)
  222. && (multiformat_a->ops->cpu_elemsize == multiformat_b->ops->cpu_elemsize)
  223. #ifdef STARPU_USE_CUDA
  224. && (multiformat_a->ops->cuda_elemsize == multiformat_b->ops->cuda_elemsize)
  225. #endif
  226. #ifdef STARPU_USE_OPENCL
  227. && (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
  228. #endif
  229. #ifdef STARPU_USE_MIC
  230. && (multiformat_a->ops->mic_elemsize == multiformat_b->ops->mic_elemsize)
  231. #endif
  232. ;
  233. }
  234. static void display_multiformat_interface(starpu_data_handle_t handle, FILE *f)
  235. {
  236. struct starpu_multiformat_interface *multiformat_interface;
  237. multiformat_interface = (struct starpu_multiformat_interface *)
  238. starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  239. fprintf(f, "%u\t", multiformat_interface->nx);
  240. }
  241. /* XXX : returns CPU size */
  242. static size_t multiformat_interface_get_size(starpu_data_handle_t handle)
  243. {
  244. size_t size;
  245. struct starpu_multiformat_interface *multiformat_interface;
  246. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  247. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  248. return size;
  249. }
  250. uint32_t starpu_multiformat_get_nx(starpu_data_handle_t handle)
  251. {
  252. struct starpu_multiformat_interface *multiformat_interface;
  253. multiformat_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
  254. return multiformat_interface->nx;
  255. }
  256. static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_, unsigned dst_node)
  257. {
  258. struct starpu_multiformat_interface *multiformat_interface;
  259. multiformat_interface = (struct starpu_multiformat_interface *) data_interface_;
  260. uintptr_t addr = 0;
  261. starpu_ssize_t allocated_memory = 0;
  262. size_t size;
  263. size = multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize;
  264. allocated_memory += size;
  265. addr = starpu_malloc_on_node(dst_node, size);
  266. if (!addr)
  267. goto fail_cpu;
  268. multiformat_interface->cpu_ptr = (void *) addr;
  269. #ifdef STARPU_USE_CUDA
  270. size = multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize;
  271. allocated_memory += size;
  272. addr = starpu_malloc_on_node(dst_node, size);
  273. if (!addr)
  274. goto fail_cuda;
  275. multiformat_interface->cuda_ptr = (void *) addr;
  276. #endif
  277. #ifdef STARPU_USE_OPENCL
  278. size = multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize;
  279. allocated_memory += size;
  280. addr = starpu_malloc_on_node(dst_node, size);
  281. if (!addr)
  282. goto fail_opencl;
  283. multiformat_interface->opencl_ptr = (void *) addr;
  284. #endif
  285. #ifdef STARPU_USE_MIC
  286. size = multiformat_interface->nx * multiformat_interface->ops->mic_elemsize;
  287. allocated_memory += size;
  288. addr = starpu_malloc_on_node(dst_node, size);
  289. if (!addr)
  290. goto fail_mic;
  291. multiformat_interface->mic_ptr = (void *) addr;
  292. #endif
  293. return allocated_memory;
  294. #ifdef STARPU_USE_MIC
  295. fail_mic:
  296. #endif
  297. #ifdef STARPU_USE_OPENCL
  298. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  299. fail_opencl:
  300. #endif
  301. #ifdef STARPU_USE_CUDA
  302. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  303. fail_cuda:
  304. #endif
  305. starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  306. fail_cpu:
  307. return -ENOMEM;
  308. }
  309. static void free_multiformat_buffer_on_node(void *data_interface, unsigned node)
  310. {
  311. struct starpu_multiformat_interface *multiformat_interface;
  312. multiformat_interface = (struct starpu_multiformat_interface *) data_interface;
  313. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cpu_ptr,
  314. multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
  315. multiformat_interface->cpu_ptr = NULL;
  316. #ifdef STARPU_USE_CUDA
  317. starpu_free_on_node(node, (uintptr_t) multiformat_interface->cuda_ptr,
  318. multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
  319. multiformat_interface->cuda_ptr = NULL;
  320. #endif
  321. #ifdef STARPU_USE_OPENCL
  322. starpu_free_on_node(node, (uintptr_t) multiformat_interface->opencl_ptr,
  323. multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
  324. multiformat_interface->opencl_ptr = NULL;
  325. #endif
  326. #ifdef STARPU_USE_MIC
  327. starpu_free_on_node(node, (uintptr_t) multiformat_interface->mic_ptr,
  328. multiformat_interface->nx * multiformat_interface->ops->mic_elemsize);
  329. multiformat_interface->mic_ptr = NULL;
  330. #endif
  331. }
  332. /*
  333. * Copy methods
  334. */
  335. static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  336. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  337. {
  338. struct starpu_multiformat_interface *src_multiformat;
  339. struct starpu_multiformat_interface *dst_multiformat;
  340. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  341. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  342. STARPU_ASSERT(src_multiformat != NULL);
  343. STARPU_ASSERT(dst_multiformat != NULL);
  344. STARPU_ASSERT(dst_multiformat->ops != NULL);
  345. size_t size = dst_multiformat->nx * dst_multiformat->ops->cpu_elemsize;
  346. memcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size);
  347. return 0;
  348. }
  349. #ifdef STARPU_USE_CUDA
  350. static int copy_cuda_common(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  351. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  352. enum cudaMemcpyKind kind)
  353. {
  354. struct starpu_multiformat_interface *src_multiformat;
  355. struct starpu_multiformat_interface *dst_multiformat;
  356. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  357. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  358. size_t size;
  359. cudaError_t status;
  360. switch (kind)
  361. {
  362. case cudaMemcpyHostToDevice:
  363. {
  364. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  365. if (src_multiformat->cuda_ptr == NULL)
  366. {
  367. src_multiformat->cuda_ptr = malloc(size);
  368. if (src_multiformat->cuda_ptr == NULL)
  369. return -ENOMEM;
  370. }
  371. status = cudaMemcpy(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind);
  372. if (!status)
  373. status = cudaDeviceSynchronize();
  374. if (STARPU_UNLIKELY(status))
  375. STARPU_CUDA_REPORT_ERROR(status);
  376. break;
  377. }
  378. case cudaMemcpyDeviceToHost:
  379. {
  380. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  381. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  382. if (!status)
  383. status = cudaDeviceSynchronize();
  384. if (STARPU_UNLIKELY(status))
  385. STARPU_CUDA_REPORT_ERROR(status);
  386. break;
  387. }
  388. case cudaMemcpyDeviceToDevice:
  389. {
  390. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  391. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  392. if (!status)
  393. status = cudaDeviceSynchronize();
  394. if (STARPU_UNLIKELY(status))
  395. STARPU_CUDA_REPORT_ERROR(status);
  396. break;
  397. }
  398. default:
  399. STARPU_ABORT();
  400. }
  401. return 0;
  402. }
  403. static int copy_ram_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  404. {
  405. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyHostToDevice);
  406. }
  407. static int copy_cuda_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node)
  408. {
  409. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToHost);
  410. }
  411. static int copy_cuda_common_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  412. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED,
  413. cudaStream_t stream, enum cudaMemcpyKind kind)
  414. {
  415. struct starpu_multiformat_interface *src_multiformat;
  416. struct starpu_multiformat_interface *dst_multiformat;
  417. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  418. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  419. size_t size;
  420. cudaError_t status;
  421. switch (kind)
  422. {
  423. case cudaMemcpyHostToDevice:
  424. {
  425. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  426. if (src_multiformat->cuda_ptr == NULL)
  427. {
  428. src_multiformat->cuda_ptr = malloc(size);
  429. if (src_multiformat->cuda_ptr == NULL)
  430. return -ENOMEM;
  431. }
  432. status = cudaMemcpyAsync(dst_multiformat->cpu_ptr, src_multiformat->cpu_ptr, size, kind, stream);
  433. if (STARPU_UNLIKELY(status))
  434. {
  435. STARPU_CUDA_REPORT_ERROR(status);
  436. }
  437. break;
  438. }
  439. case cudaMemcpyDeviceToHost:
  440. {
  441. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  442. status = cudaMemcpy(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind);
  443. if (!status)
  444. status = cudaDeviceSynchronize();
  445. if (STARPU_UNLIKELY(status))
  446. STARPU_CUDA_REPORT_ERROR(status);
  447. break;
  448. }
  449. case cudaMemcpyDeviceToDevice:
  450. {
  451. size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  452. status = cudaMemcpyAsync(dst_multiformat->cuda_ptr, src_multiformat->cuda_ptr, size, kind, stream);
  453. if (STARPU_UNLIKELY(status))
  454. STARPU_CUDA_REPORT_ERROR(status);
  455. break;
  456. }
  457. default:
  458. STARPU_ABORT();
  459. }
  460. return 0;
  461. }
  462. static int copy_ram_to_cuda_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  463. {
  464. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyHostToDevice);
  465. }
  466. static int copy_cuda_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cudaStream_t stream)
  467. {
  468. return copy_cuda_common_async(src_interface, src_node, dst_interface, dst_node, stream, cudaMemcpyDeviceToHost);
  469. }
  470. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  471. static int copy_cuda_peer_common(void *src_interface, unsigned src_node,
  472. void *dst_interface, unsigned dst_node,
  473. cudaStream_t stream)
  474. {
  475. struct starpu_multiformat_interface *src_multiformat;
  476. struct starpu_multiformat_interface *dst_multiformat;
  477. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  478. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  479. STARPU_ASSERT(src_multiformat != NULL);
  480. STARPU_ASSERT(dst_multiformat != NULL);
  481. STARPU_ASSERT(src_multiformat->ops != NULL);
  482. cudaError_t status;
  483. int size = src_multiformat->nx * src_multiformat->ops->cuda_elemsize;
  484. int src_dev = starpu_memory_node_get_devid(src_node);
  485. int dst_dev = starpu_memory_node_get_devid(dst_node);
  486. if (stream)
  487. {
  488. double start;
  489. starpu_interface_start_driver_copy_async(src_node, dst_node, &start);
  490. status = cudaMemcpyPeerAsync(dst_multiformat->cuda_ptr, dst_dev,
  491. src_multiformat->cuda_ptr, src_dev,
  492. size, stream);
  493. starpu_interface_end_driver_copy_async(src_node, dst_node, start);
  494. /* All good ! Still, returning -EAGAIN, because we will need to
  495. check the transfert completion later */
  496. if (status == cudaSuccess)
  497. return -EAGAIN;
  498. }
  499. /* Either a synchronous transfert was requested, or the asynchronous one
  500. failed. */
  501. status = cudaMemcpyPeer(dst_multiformat->cuda_ptr, dst_dev,
  502. src_multiformat->cuda_ptr, src_dev,
  503. size);
  504. if (!status)
  505. status = cudaDeviceSynchronize();
  506. if (STARPU_UNLIKELY(status != cudaSuccess))
  507. STARPU_CUDA_REPORT_ERROR(status);
  508. starpu_interface_data_copy(src_node, dst_node, size);
  509. return 0;
  510. }
  511. #endif
  512. static int copy_cuda_to_cuda(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  513. {
  514. if (src_node == dst_node)
  515. {
  516. return copy_cuda_common(src_interface, src_node, dst_interface, dst_node, cudaMemcpyDeviceToDevice);
  517. }
  518. else
  519. {
  520. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  521. return copy_cuda_peer_common(src_interface, src_node,
  522. dst_interface, dst_node,
  523. NULL);
  524. #else
  525. STARPU_ABORT();
  526. #endif
  527. }
  528. }
  529. static int copy_cuda_to_cuda_async(void *src_interface, unsigned src_node,
  530. void *dst_interface, unsigned dst_node,
  531. cudaStream_t stream)
  532. {
  533. if (src_node == dst_node)
  534. {
  535. return copy_cuda_common_async(src_interface, src_node,
  536. dst_interface, dst_node,
  537. stream, cudaMemcpyDeviceToDevice);
  538. }
  539. else
  540. {
  541. #ifdef STARPU_HAVE_CUDA_MEMCPY_PEER
  542. return copy_cuda_peer_common(src_interface, src_node,
  543. dst_interface, dst_node,
  544. stream);
  545. #else
  546. STARPU_ABORT();
  547. #endif
  548. }
  549. }
  550. #endif /* STARPU_USE_CUDA */
  551. #ifdef STARPU_USE_OPENCL
  552. static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node,
  553. void *dst_interface, unsigned dst_node,
  554. cl_event *event)
  555. {
  556. int err, ret;
  557. size_t size;
  558. struct starpu_multiformat_interface *src_multiformat;
  559. struct starpu_multiformat_interface *dst_multiformat;
  560. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  561. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  562. STARPU_ASSERT(src_multiformat != NULL);
  563. STARPU_ASSERT(dst_multiformat != NULL);
  564. STARPU_ASSERT(src_multiformat->ops != NULL);
  565. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  566. err = starpu_opencl_copy_ram_to_opencl(src_multiformat->cpu_ptr,
  567. src_node,
  568. (cl_mem) dst_multiformat->cpu_ptr,
  569. dst_node,
  570. size,
  571. 0,
  572. event,
  573. &ret);
  574. if (STARPU_UNLIKELY(err))
  575. STARPU_OPENCL_REPORT_ERROR(err);
  576. starpu_interface_data_copy(src_node, dst_node, size);
  577. return ret;
  578. }
  579. static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node,
  580. void *dst_interface, unsigned dst_node,
  581. cl_event *event)
  582. {
  583. int err, ret;
  584. size_t size;
  585. struct starpu_multiformat_interface *src_multiformat;
  586. struct starpu_multiformat_interface *dst_multiformat;
  587. src_multiformat = (struct starpu_multiformat_interface *) src_interface;
  588. dst_multiformat = (struct starpu_multiformat_interface *) dst_interface;
  589. STARPU_ASSERT(src_multiformat != NULL);
  590. STARPU_ASSERT(dst_multiformat != NULL);
  591. STARPU_ASSERT(src_multiformat->ops != NULL);
  592. STARPU_ASSERT(dst_multiformat->ops != NULL);
  593. size = src_multiformat->nx * src_multiformat->ops->opencl_elemsize;
  594. if (dst_multiformat->opencl_ptr == NULL)
  595. {
  596. /* XXX : it is weird that we might have to allocate memory here... */
  597. dst_multiformat->opencl_ptr = malloc(dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize);
  598. STARPU_ASSERT_MSG(dst_multiformat->opencl_ptr != NULL || dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize == 0, "Cannot allocate %ld bytes\n", (long) (dst_multiformat->nx * dst_multiformat->ops->opencl_elemsize));
  599. }
  600. err = starpu_opencl_copy_opencl_to_ram((cl_mem)src_multiformat->opencl_ptr,
  601. src_node,
  602. dst_multiformat->opencl_ptr,
  603. dst_node,
  604. size,
  605. 0,
  606. event,
  607. &ret);
  608. if (STARPU_UNLIKELY(err))
  609. STARPU_OPENCL_REPORT_ERROR(err);
  610. starpu_interface_data_copy(src_node, dst_node, size);
  611. return ret;
  612. }
  613. static int copy_ram_to_opencl(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  614. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  615. {
  616. return copy_ram_to_opencl_async(src_interface, src_node, dst_interface, dst_node, NULL);
  617. }
  618. static int copy_opencl_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED,
  619. void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
  620. {
  621. return copy_opencl_to_ram_async(src_interface, src_node, dst_interface, dst_node, NULL);
  622. }
  623. static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
  624. void *dst_interface, unsigned dst_node)
  625. {
  626. (void) src_interface;
  627. (void) dst_interface;
  628. (void) src_node;
  629. (void) dst_node;
  630. STARPU_ASSERT_MSG(0, "XXX multiformat copy OpenCL-OpenCL not supported yet (TODO)");
  631. return 0;
  632. }
  633. #endif
  634. #ifdef STARPU_USE_MIC
  635. static int copy_mic_common_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
  636. int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
  637. {
  638. struct starpu_multiformat_interface *src_multiformat = src_interface;
  639. struct starpu_multiformat_interface *dst_multiformat = dst_interface;
  640. STARPU_ASSERT(src_multiformat != NULL);
  641. STARPU_ASSERT(dst_multiformat != NULL);
  642. STARPU_ASSERT(dst_multiformat->ops != NULL);
  643. size_t size = dst_multiformat->nx * dst_multiformat->ops->mic_elemsize;
  644. if (src_multiformat->mic_ptr == NULL)
  645. {
  646. src_multiformat->mic_ptr = malloc(size);
  647. if (src_multiformat->mic_ptr == NULL)
  648. return -ENOMEM;
  649. }
  650. copy_func(src_multiformat->cpu_ptr, src_node, dst_multiformat->cpu_ptr, dst_node, size);
  651. starpu_interface_data_copy(src_node, dst_node, size);
  652. return 0;
  653. }
  654. static int copy_mic_common_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
  655. int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
  656. {
  657. struct starpu_multiformat_interface *src_multiformat = src_interface;
  658. struct starpu_multiformat_interface *dst_multiformat = dst_interface;
  659. STARPU_ASSERT(src_multiformat != NULL);
  660. STARPU_ASSERT(dst_multiformat != NULL);
  661. STARPU_ASSERT(dst_multiformat->ops != NULL);
  662. size_t size = src_multiformat->nx * src_multiformat->ops->mic_elemsize;
  663. copy_func(src_multiformat->mic_ptr, src_node, dst_multiformat->mic_ptr, dst_node, size);
  664. starpu_interface_data_copy(src_node, dst_node, size);
  665. return 0;
  666. }
  667. static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  668. {
  669. return copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
  670. }
  671. static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  672. {
  673. return copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
  674. }
  675. static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  676. {
  677. copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
  678. return -EAGAIN;
  679. }
  680. static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
  681. {
  682. copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
  683. return -EAGAIN;
  684. }
  685. #endif