driver_fpga.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2019-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <starpu_fpga.h>
  18. #include <starpu_profiling.h>
  19. #include <common/utils.h>
  20. #include <common/config.h>
  21. #include <core/debug.h>
  22. #include <drivers/driver_common/driver_common.h>
  23. #include "driver_fpga.h"
  24. #include <core/sched_policy.h>
  25. #include <datawizard/memory_manager.h>
  26. #include <datawizard/memory_nodes.h>
  27. #include <datawizard/malloc.h>
  28. //#include <MaxSLiCInterface.h>
  29. #define KNRM "\x1B[0m"
  30. #define KRED "\x1B[31m"
  31. #define KGRN "\x1B[32m"
  32. #define KYEL "\x1B[33m"
  33. #define KBLU "\x1B[34m"
  34. #define KMAG "\x1B[35m"
  35. #define KCYN "\x1B[36m"
  36. #define KWHT "\x1B[37m"
  37. #define FPGA_OK KGRN
  38. #define FPGA_ERROR KRED
  39. #define NORMAL KNRM
  40. #define FPGA_OK KGRN
  41. //#define STARPU_MAXFPGADEVS 4
  42. /* the number of FPGA devices */
  43. static unsigned nfpgafpgas;
  44. static size_t global_mem[STARPU_MAXFPGADEVS];
  45. static max_engine_t *engines[STARPU_MAXFPGADEVS];
  46. static fpga_mem current_address[STARPU_MAXFPGADEVS];
  47. static void _starpu_fpga_limit_global_mem(unsigned );
  48. static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
  49. void fpga_msg(char *msg)
  50. {
  51. printf(FPGA_OK "%s\n" NORMAL, msg);
  52. }
  53. max_engine_t *starpu_fpga_get_local_engine(void)
  54. {
  55. int worker = starpu_worker_get_id_check();
  56. int devid = starpu_worker_get_devid(worker);
  57. return engines[devid];
  58. }
  59. void _starpu_init_fpga()
  60. {
  61. }
  62. void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
  63. {
  64. //TODO: This is statically assigned, in the next round of integration
  65. // I will have to read from the struct fpga in fpga
  66. struct starpu_max_load *load = _starpu_config.conf.fpga_load;
  67. const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION);
  68. int n;
  69. n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
  70. if (n != -1)
  71. {
  72. config->topology.nhwfpgafpgas = nfpgafpgas = n;
  73. return;
  74. }
  75. if (!load)
  76. {
  77. /* Nothing specified, single-FPGA execution with basic static
  78. * interface, file will be auto-loaded by SLiC. */
  79. n = 1;
  80. }
  81. else
  82. {
  83. struct starpu_max_load *cur, *star = NULL;
  84. size_t nstar = 0;
  85. /* First check if we have a star, we will want to subtract non-star loads from it */
  86. for (cur = load; cur->engine_id_pattern; cur++)
  87. if (!strcmp(cur->engine_id_pattern, "*"))
  88. {
  89. STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last");
  90. star = cur;
  91. if (sim_socket)
  92. /* not specified, assume 1 */
  93. nstar = 1;
  94. else
  95. nstar = max_count_engines_free(cur->file, cur->engine_id_pattern);
  96. break;
  97. }
  98. n = 0;
  99. /* Now check the non-star loads */
  100. for (cur = load; cur != star && cur->engine_id_pattern; cur++)
  101. {
  102. size_t size;
  103. size = max_count_engines_free(load->file, load->engine_id_pattern);
  104. STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %d on %s", (unsigned) (cur - load), load->engine_id_pattern);
  105. /* One FPGA more to be used */
  106. n++;
  107. if (nstar)
  108. {
  109. size = max_count_engines_free(load->file, "*");
  110. if (size > 1)
  111. /* One of the star devices will be used to load this file */
  112. nstar--;
  113. }
  114. }
  115. n += nstar;
  116. }
  117. //LMemInterface addLMemInterface()
  118. //// pour récupérer l'accès à la LMem
  119. config->topology.nhwfpgafpgas = nfpgafpgas = n;
  120. }
  121. unsigned _starpu_fpga_get_device_count(void)
  122. {
  123. return nfpgafpgas;
  124. }
  125. static void _starpu_fpga_limit_global_mem(unsigned devid)
  126. {
  127. starpu_ssize_t limit=-1;
  128. //TODO
  129. limit = starpu_get_env_number("STARPU_LIMIT_FPGA_MEM");
  130. if(limit != -1)
  131. global_mem[devid] = limit*1024*1024;
  132. }
  133. static size_t _starpu_fpga_get_global_mem_size(unsigned devid)
  134. {
  135. return global_mem[devid];
  136. }
  137. static void init_fpga_worker_context(unsigned workerid)
  138. {
  139. }
  140. static void init_device_context(unsigned devid)
  141. {
  142. struct starpu_max_load *load = _starpu_config.conf.fpga_load;
  143. /* 0 would be seen as NULL, i.e. allocation failed... */
  144. // FIXME: Maxeler FPGAs want 192-byte alignment
  145. current_address[devid] = (fpga_mem) (8192*192);
  146. global_mem[devid] = 128ULL*1024*1024*1024;
  147. _starpu_fpga_limit_global_mem(devid);
  148. if (!load) {
  149. /* Nothing specified, single-FPGA execution with basic static
  150. * interface, file will be auto-loaded by SLiC. */
  151. return;
  152. } else {
  153. unsigned n;
  154. /* Which load we shall use */
  155. for (n = 0; load->file; load++)
  156. {
  157. if (!strcmp(load->engine_id_pattern, "*"))
  158. break;
  159. if (n == devid)
  160. break;
  161. }
  162. STARPU_ASSERT(load->file);
  163. if (!strcmp(load->engine_id_pattern, "*"))
  164. {
  165. char s[32];
  166. snprintf(s, sizeof(s), "local:%u", (unsigned) devid);
  167. /* FIXME: this assumes that the loads are in-order.
  168. * Ideally we'd detect which ones had an explicit load */
  169. engines[n] = max_load(load->file, load->engine_id_pattern);
  170. }
  171. else
  172. engines[n] = max_load(load->file, load->engine_id_pattern);
  173. }
  174. }
  175. int _starpu_fpga_driver_init(struct _starpu_worker *worker)
  176. {
  177. int devid = worker->devid;
  178. //fpga_msg("successful till here");
  179. _starpu_driver_start(worker, _STARPU_FUT_CPU_KEY, 1);
  180. /* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
  181. // TODO: drop test when we allocated a memory node for fpga
  182. if (worker->memory_node != STARPU_MAIN_RAM)
  183. _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_fpga_get_global_mem_size(worker->devid));
  184. // TODO: multiple fpga in same thread
  185. init_device_context(devid);
  186. snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid);
  187. snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid);
  188. starpu_pthread_setname(worker->short_name);
  189. _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
  190. /* tell the main thread that we are ready */
  191. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  192. worker->status = STATUS_UNKNOWN;
  193. worker->worker_is_initialized = 1;
  194. STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond);
  195. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  196. return 0;
  197. }
  198. static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch)
  199. {
  200. int ret;
  201. int profiling = starpu_profiling_status_get();
  202. struct starpu_task *task = j->task;
  203. struct starpu_codelet *cl = task->cl;
  204. STARPU_ASSERT(cl);
  205. /* TODO: use asynchronous */
  206. ret = _starpu_fetch_task_input(task, j, 0);
  207. if (ret != 0)
  208. {
  209. /* there was not enough memory so the codelet cannot be executed right now ... */
  210. /* push the codelet back and try another one ... */
  211. return -EAGAIN;
  212. }
  213. /* Give profiling variable */
  214. _starpu_driver_start_job(fpga_args, j, perf_arch, rank, profiling);
  215. /* In case this is a Fork-join parallel task, the worker does not
  216. * execute the kernel at all. */
  217. if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
  218. {
  219. _starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
  220. //char *kernel_type = _starpu_task_get_fpga_kernel_type_nth_implementation(cl, j->nimpl);
  221. //printf("chanel reserved: %d \n",chnl);
  222. STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
  223. if (_starpu_get_disable_kernels() <= 0)
  224. {
  225. _STARPU_TRACE_START_EXECUTING();
  226. //int chnl = fpga_reserve_chanel_of_kernel_type(kernel_type);
  227. //_starpu_fpga_transfer_data(_STARPU_TASK_GET_INTERFACES(task), j, chnl);
  228. //fpga_release_chanel(chnl);
  229. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  230. _STARPU_TRACE_END_EXECUTING();
  231. }
  232. }
  233. _starpu_driver_end_job(fpga_args, j, perf_arch, rank, profiling);
  234. _starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling);
  235. _starpu_push_task_output(j);
  236. return 0;
  237. }
  238. int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
  239. {
  240. unsigned memnode = fpga_worker->memory_node;
  241. int workerid = fpga_worker->workerid;
  242. _STARPU_TRACE_START_PROGRESS(memnode);
  243. _starpu_datawizard_progress(1);
  244. if (memnode != STARPU_MAIN_RAM)
  245. {
  246. _starpu_datawizard_progress(1);
  247. }
  248. _STARPU_TRACE_END_PROGRESS(memnode);
  249. struct _starpu_job *j;
  250. struct starpu_task *task;
  251. int res;
  252. task = _starpu_get_worker_task(fpga_worker, workerid, memnode);
  253. if (!task)
  254. return 0;
  255. j = _starpu_get_job_associated_to_task(task);
  256. /* can a cpu perform that task ? */
  257. if (!_STARPU_FPGA_MAY_PERFORM(j))
  258. {
  259. /* put it and the end of the queue ... XXX */
  260. _starpu_push_task_to_workers(task);
  261. return 0;
  262. }
  263. int rank = 0;
  264. int is_parallel_task = (j->task_size > 1);
  265. struct starpu_perfmodel_arch* perf_arch;
  266. if (is_parallel_task)
  267. {
  268. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  269. rank = j->active_task_alias_count++;
  270. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  271. if(j->combined_workerid != -1)
  272. {
  273. struct _starpu_combined_worker *combined_worker;
  274. combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid);
  275. fpga_worker->combined_workerid = j->combined_workerid;
  276. fpga_worker->worker_size = combined_worker->worker_size;
  277. fpga_worker->current_rank = rank;
  278. perf_arch = &combined_worker->perf_arch;
  279. }
  280. else
  281. {
  282. struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(fpga_worker, j);
  283. STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", fpga_worker->workerid);
  284. perf_arch = &sched_ctx->perf_arch;
  285. }
  286. }
  287. else
  288. {
  289. fpga_worker->combined_workerid = fpga_worker->workerid;
  290. fpga_worker->worker_size = 1;
  291. fpga_worker->current_rank = 0;
  292. perf_arch = &fpga_worker->perf_arch;
  293. }
  294. _starpu_set_current_task(j->task);
  295. fpga_worker->current_task = j->task;
  296. res = execute_job_on_fpga(j, task, fpga_worker, rank, perf_arch);
  297. _starpu_set_current_task(NULL);
  298. fpga_worker->current_task = NULL;
  299. if (res)
  300. {
  301. switch (res)
  302. {
  303. case -EAGAIN:
  304. _starpu_push_task_to_workers(task);
  305. return 0;
  306. default:
  307. STARPU_ABORT();
  308. }
  309. }
  310. /* In the case of combined workers, we need to inform the
  311. * scheduler each worker's execution is over.
  312. * Then we free the workers' task alias */
  313. if (is_parallel_task)
  314. {
  315. _starpu_sched_post_exec_hook(task);
  316. free(task);
  317. }
  318. if (rank == 0)
  319. _starpu_handle_job_termination(j);
  320. return 0;
  321. }
  322. int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
  323. {
  324. _STARPU_TRACE_WORKER_DEINIT_START;
  325. unsigned memnode = fpga_worker->memory_node;
  326. _starpu_handle_all_pending_node_data_requests(memnode);
  327. /* In case there remains some memory that was automatically
  328. * allocated by StarPU, we release it now. Note that data
  329. * coherency is not maintained anymore at that point ! */
  330. _starpu_free_all_automatically_allocated_buffers(memnode);
  331. fpga_worker->worker_is_initialized = 0;
  332. _STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CPU_KEY);
  333. return 0;
  334. }
  335. void *_starpu_fpga_worker(void *_arg)
  336. {
  337. struct _starpu_worker* worker = _arg;
  338. unsigned memnode = worker->memory_node;
  339. _starpu_fpga_driver_init(worker);
  340. _STARPU_TRACE_START_PROGRESS(memnode);
  341. while (_starpu_machine_is_running())
  342. {
  343. _starpu_may_pause();
  344. //fpga_msg("\tEntered the main loop\n");
  345. _starpu_fpga_driver_run_once(worker);
  346. }
  347. _STARPU_TRACE_END_PROGRESS(memnode);
  348. _starpu_fpga_driver_deinit(worker);
  349. return NULL;
  350. }
  351. uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags)
  352. {
  353. (void) flags;
  354. unsigned devid = starpu_memory_node_get_devid(dst_node);
  355. fpga_mem addr, next_addr;
  356. addr = current_address[devid];
  357. next_addr = current_address[devid] + size;
  358. if (next_addr >= (fpga_mem) global_mem[devid])
  359. {
  360. printf("Memory overflow on %d\n", devid);
  361. return 0;
  362. }
  363. current_address[devid] = next_addr;
  364. printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
  365. return (uintptr_t) addr;
  366. }
  367. int _starpu_fpga_copy_ram_to_fpga(void *src, void *dst, size_t size)
  368. {
  369. printf("ram to fpga, fpga @= %p\n",dst);
  370. memcpy(dst,src,size);
  371. return 0;
  372. // LMemLoopback_writeLMem(dst, size, src);
  373. }
  374. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  375. * * node to the address pointed by DST in the DST_NODE memory node
  376. * */
  377. void copy_ram_to_fpga(void *src, void *dst, size_t size)
  378. {
  379. printf("ram to fpga, fpga @= %p\n",dst);
  380. // LMemLoopback_writeLMem(size, dst, src);
  381. }
  382. void copy_fpga_to_ram(void *src, void *dst, size_t size)
  383. {
  384. printf("ram to fpga, fpga @= %p\n",src);
  385. //LMemLoopback_readLMem(size, src, dst);
  386. }
  387. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  388. * node to the address pointed by DST in the DST_NODE memory node
  389. */
  390. int _starpu_fpga_copy_fpga_to_ram(void *src, void *dst, size_t size)
  391. {
  392. printf("fpga to ram, fpga @= %p\n",src);
  393. memcpy(dst,src,size);
  394. return 0;
  395. //LMemLoopback_readLMem(src, size, dst);
  396. }
  397. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  398. * node to the address pointed by DST in the DST_NODE memory node
  399. */
  400. int _starpu_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size)
  401. {
  402. printf("fpga to ram, fpga @= %p\n",src);
  403. memcpy(dst,src,size);
  404. return 0;
  405. //LMemLoopback_XXXLMem(src, size, dst);
  406. }
  407. /* Asynchronous transfers */
  408. int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size)
  409. {
  410. printf("ram to fpga, fpga @= %p\n",dst);
  411. memcpy(dst,src,size);
  412. return 0;
  413. // Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem();
  414. }
  415. int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size)
  416. {
  417. printf("fpga to ram, fpga @= %p\n",src);
  418. memcpy(dst,src,size);
  419. return 0;
  420. }
  421. int _starpu_run_fpga(struct _starpu_worker *workerarg)
  422. {
  423. /* Let's go ! */
  424. _starpu_fpga_worker(workerarg);
  425. fpga_msg("\t !!!!! ~~~ I AM IN THE DRIVER ~~~\n");
  426. return 0;
  427. }
  428. void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl)
  429. {
  430. struct starpu_task *task = j->task;
  431. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  432. unsigned index;
  433. for (index = 0; index < nbuffers; index++)
  434. {
  435. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
  436. // enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
  437. unsigned *interface_id = (unsigned *)malloc(sizeof(unsigned));
  438. *interface_id = handle->ops->interfaceid;
  439. switch (*interface_id)
  440. {
  441. case STARPU_VARIABLE_INTERFACE_ID:
  442. {
  443. void *ptr = (void*) STARPU_VARIABLE_GET_PTR(buffers[index]);
  444. size_t size = STARPU_VARIABLE_GET_ELEMSIZE(buffers[index]);
  445. //fpga_data_send(chnl,ptr,size);
  446. printf("Driver Fpga @: %p, size %lu \n",ptr,size);
  447. break;
  448. }
  449. case STARPU_MATRIX_INTERFACE_ID:
  450. case STARPU_BLOCK_INTERFACE_ID:
  451. case STARPU_VECTOR_INTERFACE_ID:
  452. case STARPU_CSR_INTERFACE_ID:
  453. case STARPU_BCSR_INTERFACE_ID:
  454. case STARPU_MULTIFORMAT_INTERFACE_ID:
  455. fpga_msg("Interface not supported yet");
  456. default:
  457. STARPU_ABORT();
  458. }
  459. }
  460. }
  461. int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  462. {
  463. return _starpu_fpga_copy_ram_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  464. }
  465. int _starpu_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  466. {
  467. return _starpu_fpga_copy_fpga_to_ram((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  468. }
  469. int _starpu_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  470. {
  471. return _starpu_fpga_copy_fpga_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  472. }
  473. int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
  474. {
  475. int src_kind = starpu_node_get_kind(src_node);
  476. int dst_kind = starpu_node_get_kind(dst_node);
  477. STARPU_ASSERT(src_kind == STARPU_FPGA_RAM && dst_kind == STARPU_CPU_RAM);
  478. int ret = 1;
  479. const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
  480. if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
  481. !(copy_methods->fpga_to_ram_async || copy_methods->any_to_any))
  482. {
  483. /* this is not associated to a request so it's synchronous */
  484. STARPU_ASSERT(copy_methods->fpga_to_ram || copy_methods->any_to_any);
  485. if (copy_methods->fpga_to_ram)
  486. copy_methods->fpga_to_ram(src_interface, src_node, dst_interface, dst_node);
  487. else
  488. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
  489. }
  490. else
  491. {
  492. //req->async_channel.type = STARPU_FPGA_RAM;
  493. if (copy_methods->fpga_to_ram_async)
  494. ret = copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
  495. else
  496. {
  497. STARPU_ASSERT(copy_methods->any_to_any);
  498. ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
  499. }
  500. //_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), src_node);
  501. }
  502. return ret;
  503. }
  504. int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
  505. {
  506. int src_kind = starpu_node_get_kind(src_node);
  507. int dst_kind = starpu_node_get_kind(dst_node);
  508. STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_FPGA_RAM);
  509. const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
  510. if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
  511. !(copy_methods->ram_to_fpga_async || copy_methods->any_to_any))
  512. {
  513. /* this is not associated to a request so it's synchronous */
  514. STARPU_ASSERT(copy_methods->ram_to_fpga || copy_methods->any_to_any);
  515. if (copy_methods->ram_to_fpga)
  516. copy_methods->ram_to_fpga(src_interface, src_node, dst_interface, dst_node);
  517. else
  518. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
  519. }
  520. else
  521. {
  522. //req->async_channel.type = STARPU_FPGA_RAM;
  523. if (copy_methods->ram_to_fpga_async)
  524. copy_methods->ram_to_fpga_async(src_interface, src_node, dst_interface, dst_node);
  525. else
  526. {
  527. STARPU_ASSERT(copy_methods->any_to_any);
  528. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
  529. }
  530. //_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node);
  531. }
  532. return 0;
  533. }
  534. struct _starpu_driver_ops _starpu_driver_fpga_ops =
  535. {
  536. .init = _starpu_fpga_driver_init,
  537. .run = _starpu_run_fpga,
  538. .run_once = _starpu_fpga_driver_run_once,
  539. .deinit = _starpu_fpga_driver_deinit
  540. };
  541. // TODO: transfers
  542. struct _starpu_node_ops _starpu_driver_fpga_node_ops =
  543. {
  544. .copy_data_to[STARPU_UNUSED] = NULL,
  545. //.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
  546. //.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
  547. .copy_data_to[STARPU_OPENCL_RAM] = NULL,
  548. .copy_data_to[STARPU_DISK_RAM] = NULL,
  549. .copy_data_to[STARPU_MIC_RAM] = NULL,
  550. .copy_data_to[STARPU_MPI_MS_RAM] = NULL,
  551. .copy_interface_to[STARPU_UNUSED] = NULL,
  552. //.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
  553. .copy_interface_to[STARPU_FPGA_RAM] = NULL,
  554. .copy_interface_to[STARPU_OPENCL_RAM] = NULL,
  555. .copy_interface_to[STARPU_DISK_RAM] = NULL,
  556. .copy_interface_to[STARPU_MIC_RAM] = NULL,
  557. .copy_interface_to[STARPU_MPI_MS_RAM] = NULL,
  558. .wait_request_completion = NULL,
  559. .test_request_completion = NULL,
  560. .is_direct_access_supported = NULL,
  561. .malloc_on_node = _starpu_fpga_allocate_memory,
  562. .free_on_node = NULL,
  563. .name = "fpga driver"
  564. };