driver_fpga.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2019-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <starpu_fpga.h>
  18. #include <starpu_profiling.h>
  19. #include <common/utils.h>
  20. #include <common/config.h>
  21. #include <core/debug.h>
  22. #include <drivers/driver_common/driver_common.h>
  23. #include "driver_fpga.h"
  24. #include <core/sched_policy.h>
  25. #include <datawizard/memory_manager.h>
  26. #include <datawizard/memory_nodes.h>
  27. #include <datawizard/malloc.h>
  28. //#include <MaxSLiCInterface.h>
  29. #define KNRM "\x1B[0m"
  30. #define KRED "\x1B[31m"
  31. #define KGRN "\x1B[32m"
  32. #define KYEL "\x1B[33m"
  33. #define KBLU "\x1B[34m"
  34. #define KMAG "\x1B[35m"
  35. #define KCYN "\x1B[36m"
  36. #define KWHT "\x1B[37m"
  37. #define FPGA_OK KGRN
  38. #define FPGA_ERROR KRED
  39. #define NORMAL KNRM
  40. #define FPGA_OK KGRN
  41. //#define STARPU_MAXFPGADEVS 4
  42. /* the number of FPGA devices */
  43. static unsigned nfpgafpgas;
  44. static size_t global_mem[STARPU_MAXFPGADEVS];
  45. static max_engine_t *engines[STARPU_MAXFPGADEVS];
  46. static fpga_mem current_address[STARPU_MAXFPGADEVS];
  47. static void _starpu_fpga_limit_global_mem(unsigned );
  48. static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
  49. void fpga_msg(char *msg)
  50. {
  51. printf(FPGA_OK "%s\n" NORMAL, msg);
  52. }
  53. max_engine_t *starpu_fpga_get_local_engine(void)
  54. {
  55. int worker = starpu_worker_get_id_check();
  56. int devid = starpu_worker_get_devid(worker);
  57. return engines[devid];
  58. }
  59. void _starpu_init_fpga()
  60. {
  61. }
  62. void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
  63. {
  64. //TODO: This is statically assigned, in the next round of integration
  65. // I will have to read from the struct fpga in fpga
  66. struct starpu_max_load *load = _starpu_config.conf.fpga_load;
  67. const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION);
  68. int n;
  69. n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
  70. if (n != -1)
  71. {
  72. config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
  73. return;
  74. }
  75. if (!load)
  76. {
  77. /* Nothing specified, single-FPGA execution with basic static
  78. * interface, file will be auto-loaded by SLiC. */
  79. n = 1;
  80. }
  81. else
  82. {
  83. struct starpu_max_load *cur, *star = NULL;
  84. size_t nstar = 0;
  85. /* First check if we have a star, we will want to subtract non-star loads from it */
  86. for (cur = load; cur->engine_id_pattern; cur++)
  87. if (!strcmp(cur->engine_id_pattern, "*"))
  88. {
  89. STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last");
  90. star = cur;
  91. if (sim_socket)
  92. /* not specified, assume 1 */
  93. nstar = 1;
  94. else
  95. nstar = max_count_engines_free(cur->file, cur->engine_id_pattern);
  96. break;
  97. }
  98. n = 0;
  99. /* Now check the non-star loads */
  100. for (cur = load; cur != star && cur->engine_id_pattern; cur++)
  101. {
  102. size_t size;
  103. size = max_count_engines_free(load->file, load->engine_id_pattern);
  104. STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %d on %s", (unsigned) (cur - load), load->engine_id_pattern);
  105. /* One FPGA more to be used */
  106. n++;
  107. if (nstar)
  108. {
  109. size = max_count_engines_free(load->file, "*");
  110. if (size > 1)
  111. /* One of the star devices will be used to load this file */
  112. nstar--;
  113. }
  114. }
  115. n += nstar;
  116. }
  117. //LMemInterface addLMemInterface()
  118. //// pour récupérer l'accès à la LMem
  119. config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
  120. }
  121. unsigned _starpu_fpga_get_device_count(void)
  122. {
  123. return nfpgafpgas;
  124. }
  125. static void _starpu_fpga_limit_global_mem(unsigned devid)
  126. {
  127. starpu_ssize_t limit=-1;
  128. //TODO
  129. limit = starpu_get_env_number("STARPU_LIMIT_FPGA_MEM");
  130. if(limit != -1)
  131. global_mem[devid] = limit*1024*1024;
  132. }
  133. static size_t _starpu_fpga_get_global_mem_size(unsigned devid)
  134. {
  135. return global_mem[devid];
  136. }
  137. static void init_fpga_worker_context(unsigned workerid)
  138. {
  139. }
  140. static void init_device_context(unsigned devid)
  141. {
  142. struct starpu_max_load *load = _starpu_config.conf.fpga_load;
  143. /* 0 would be seen as NULL, i.e. allocation failed... */
  144. // FIXME: Maxeler FPGAs want 192-byte alignment
  145. // TODO: use int max_get_burst_size (max_file_t *maxfile, const char *name)
  146. current_address[devid] = (fpga_mem) (8192*192);
  147. global_mem[devid] = 128ULL*1024*1024*1024;
  148. _starpu_fpga_limit_global_mem(devid);
  149. if (!load) {
  150. /* Nothing specified, single-FPGA execution with basic static
  151. * interface, file will be auto-loaded by SLiC. */
  152. return;
  153. } else {
  154. unsigned n;
  155. /* Which load we shall use */
  156. for (n = 0; load->file; load++)
  157. {
  158. if (!strcmp(load->engine_id_pattern, "*"))
  159. break;
  160. if (n == devid)
  161. break;
  162. }
  163. STARPU_ASSERT(load->file);
  164. if (!strcmp(load->engine_id_pattern, "*"))
  165. {
  166. char s[32];
  167. snprintf(s, sizeof(s), "local:%u", (unsigned) devid);
  168. /* FIXME: this assumes that the loads are in-order.
  169. * Ideally we'd detect which ones had an explicit load */
  170. engines[n] = max_load(load->file, load->engine_id_pattern);
  171. }
  172. else
  173. engines[n] = max_load(load->file, load->engine_id_pattern);
  174. }
  175. }
  176. int _starpu_fpga_driver_init(struct _starpu_worker *worker)
  177. {
  178. int devid = worker->devid;
  179. //fpga_msg("successful till here");
  180. _starpu_driver_start(worker, STARPU_FPGA_WORKER, 1);
  181. /* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
  182. // TODO: drop test when we allocated a memory node for fpga
  183. if (worker->memory_node != STARPU_MAIN_RAM)
  184. _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_fpga_get_global_mem_size(worker->devid));
  185. // TODO: multiple fpga in same thread
  186. init_device_context(devid);
  187. snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid);
  188. snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid);
  189. starpu_pthread_setname(worker->short_name);
  190. _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
  191. /* tell the main thread that we are ready */
  192. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  193. worker->status = STATUS_UNKNOWN;
  194. worker->worker_is_initialized = 1;
  195. STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond);
  196. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  197. return 0;
  198. }
  199. static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch)
  200. {
  201. int ret;
  202. int profiling = starpu_profiling_status_get();
  203. struct starpu_task *task = j->task;
  204. struct starpu_codelet *cl = task->cl;
  205. STARPU_ASSERT(cl);
  206. /* TODO: use asynchronous */
  207. ret = _starpu_fetch_task_input(task, j, 0);
  208. if (ret != 0)
  209. {
  210. /* there was not enough memory so the codelet cannot be executed right now ... */
  211. /* push the codelet back and try another one ... */
  212. return -EAGAIN;
  213. }
  214. /* Give profiling variable */
  215. _starpu_driver_start_job(fpga_args, j, perf_arch, rank, profiling);
  216. /* In case this is a Fork-join parallel task, the worker does not
  217. * execute the kernel at all. */
  218. if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
  219. {
  220. _starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
  221. STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
  222. if (_starpu_get_disable_kernels() <= 0)
  223. {
  224. _STARPU_TRACE_START_EXECUTING();
  225. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  226. _STARPU_TRACE_END_EXECUTING();
  227. }
  228. }
  229. _starpu_driver_end_job(fpga_args, j, perf_arch, rank, profiling);
  230. _starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling);
  231. _starpu_push_task_output(j);
  232. return 0;
  233. }
  234. int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
  235. {
  236. unsigned memnode = fpga_worker->memory_node;
  237. int workerid = fpga_worker->workerid;
  238. _STARPU_TRACE_START_PROGRESS(memnode);
  239. _starpu_datawizard_progress(1);
  240. if (memnode != STARPU_MAIN_RAM)
  241. {
  242. _starpu_datawizard_progress(1);
  243. }
  244. _STARPU_TRACE_END_PROGRESS(memnode);
  245. struct _starpu_job *j;
  246. struct starpu_task *task;
  247. int res;
  248. task = _starpu_get_worker_task(fpga_worker, workerid, memnode);
  249. if (!task)
  250. return 0;
  251. j = _starpu_get_job_associated_to_task(task);
  252. /* can a cpu perform that task ? */
  253. if (!_STARPU_MAY_PERFORM(j, FPGA))
  254. {
  255. /* put it and the end of the queue ... XXX */
  256. _starpu_push_task_to_workers(task);
  257. return 0;
  258. }
  259. int rank = 0;
  260. int is_parallel_task = (j->task_size > 1);
  261. struct starpu_perfmodel_arch* perf_arch;
  262. if (is_parallel_task)
  263. {
  264. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  265. rank = j->active_task_alias_count++;
  266. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  267. if(j->combined_workerid != -1)
  268. {
  269. struct _starpu_combined_worker *combined_worker;
  270. combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid);
  271. fpga_worker->combined_workerid = j->combined_workerid;
  272. fpga_worker->worker_size = combined_worker->worker_size;
  273. fpga_worker->current_rank = rank;
  274. perf_arch = &combined_worker->perf_arch;
  275. }
  276. else
  277. {
  278. struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(fpga_worker, j);
  279. STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", fpga_worker->workerid);
  280. perf_arch = &sched_ctx->perf_arch;
  281. }
  282. }
  283. else
  284. {
  285. fpga_worker->combined_workerid = fpga_worker->workerid;
  286. fpga_worker->worker_size = 1;
  287. fpga_worker->current_rank = 0;
  288. perf_arch = &fpga_worker->perf_arch;
  289. }
  290. _starpu_set_current_task(j->task);
  291. fpga_worker->current_task = j->task;
  292. res = execute_job_on_fpga(j, task, fpga_worker, rank, perf_arch);
  293. _starpu_set_current_task(NULL);
  294. fpga_worker->current_task = NULL;
  295. if (res)
  296. {
  297. switch (res)
  298. {
  299. case -EAGAIN:
  300. _starpu_push_task_to_workers(task);
  301. return 0;
  302. default:
  303. STARPU_ABORT();
  304. }
  305. }
  306. /* In the case of combined workers, we need to inform the
  307. * scheduler each worker's execution is over.
  308. * Then we free the workers' task alias */
  309. if (is_parallel_task)
  310. {
  311. _starpu_sched_post_exec_hook(task);
  312. free(task);
  313. }
  314. if (rank == 0)
  315. _starpu_handle_job_termination(j);
  316. return 0;
  317. }
  318. int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
  319. {
  320. _STARPU_TRACE_WORKER_DEINIT_START;
  321. unsigned memnode = fpga_worker->memory_node;
  322. _starpu_handle_all_pending_node_data_requests(memnode);
  323. /* In case there remains some memory that was automatically
  324. * allocated by StarPU, we release it now. Note that data
  325. * coherency is not maintained anymore at that point ! */
  326. _starpu_free_all_automatically_allocated_buffers(memnode);
  327. fpga_worker->worker_is_initialized = 0;
  328. _STARPU_TRACE_WORKER_DEINIT_END(STARPU_FPGA_WORKER);
  329. return 0;
  330. }
  331. void *_starpu_fpga_worker(void *_arg)
  332. {
  333. struct _starpu_worker* worker = _arg;
  334. unsigned memnode = worker->memory_node;
  335. _starpu_fpga_driver_init(worker);
  336. _STARPU_TRACE_START_PROGRESS(memnode);
  337. while (_starpu_machine_is_running())
  338. {
  339. _starpu_may_pause();
  340. //fpga_msg("\tEntered the main loop\n");
  341. _starpu_fpga_driver_run_once(worker);
  342. }
  343. _STARPU_TRACE_END_PROGRESS(memnode);
  344. _starpu_fpga_driver_deinit(worker);
  345. return NULL;
  346. }
  347. uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags)
  348. {
  349. (void) flags;
  350. unsigned devid = starpu_memory_node_get_devid(dst_node);
  351. fpga_mem addr, next_addr;
  352. addr = current_address[devid];
  353. next_addr = current_address[devid] + size;
  354. if (next_addr >= (fpga_mem) global_mem[devid])
  355. {
  356. printf("Memory overflow on %d\n", devid);
  357. return 0;
  358. }
  359. current_address[devid] = next_addr;
  360. printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
  361. return (uintptr_t) addr;
  362. }
  363. int _starpu_fpga_copy_ram_to_fpga(void *src, void *dst, size_t size)
  364. {
  365. printf("ram to fpga, fpga @= %p\n",dst);
  366. memcpy(dst,src,size);
  367. return 0;
  368. // LMemLoopback_writeLMem(dst, size, src);
  369. }
  370. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  371. * * node to the address pointed by DST in the DST_NODE memory node
  372. * */
  373. void copy_ram_to_fpga(void *src, void *dst, size_t size)
  374. {
  375. printf("ram to fpga, fpga @= %p\n",dst);
  376. // LMemLoopback_writeLMem(size, dst, src);
  377. }
  378. void copy_fpga_to_ram(void *src, void *dst, size_t size)
  379. {
  380. printf("ram to fpga, fpga @= %p\n",src);
  381. //LMemLoopback_readLMem(size, src, dst);
  382. }
  383. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  384. * node to the address pointed by DST in the DST_NODE memory node
  385. */
  386. int _starpu_fpga_copy_fpga_to_ram(void *src, void *dst, size_t size)
  387. {
  388. printf("fpga to ram, fpga @= %p\n",src);
  389. memcpy(dst,src,size);
  390. return 0;
  391. //LMemLoopback_readLMem(src, size, dst);
  392. }
  393. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  394. * node to the address pointed by DST in the DST_NODE memory node
  395. */
  396. int _starpu_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size)
  397. {
  398. printf("fpga to ram, fpga @= %p\n",src);
  399. memcpy(dst,src,size);
  400. return 0;
  401. //LMemLoopback_XXXLMem(src, size, dst);
  402. }
  403. /* Asynchronous transfers */
  404. int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size)
  405. {
  406. printf("ram to fpga, fpga @= %p\n",dst);
  407. memcpy(dst,src,size);
  408. return 0;
  409. // Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem();
  410. }
  411. int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size)
  412. {
  413. printf("fpga to ram, fpga @= %p\n",src);
  414. memcpy(dst,src,size);
  415. return 0;
  416. }
  417. int _starpu_run_fpga(struct _starpu_worker *workerarg)
  418. {
  419. /* Let's go ! */
  420. _starpu_fpga_worker(workerarg);
  421. fpga_msg("\t !!!!! ~~~ I AM IN THE DRIVER ~~~\n");
  422. return 0;
  423. }
  424. void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl)
  425. {
  426. struct starpu_task *task = j->task;
  427. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  428. unsigned index;
  429. for (index = 0; index < nbuffers; index++)
  430. {
  431. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
  432. // enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
  433. unsigned *interface_id = (unsigned *)malloc(sizeof(unsigned));
  434. *interface_id = handle->ops->interfaceid;
  435. switch (*interface_id)
  436. {
  437. case STARPU_VARIABLE_INTERFACE_ID:
  438. {
  439. void *ptr = (void*) STARPU_VARIABLE_GET_PTR(buffers[index]);
  440. size_t size = STARPU_VARIABLE_GET_ELEMSIZE(buffers[index]);
  441. //fpga_data_send(chnl,ptr,size);
  442. printf("Driver Fpga @: %p, size %lu \n",ptr,size);
  443. break;
  444. }
  445. case STARPU_MATRIX_INTERFACE_ID:
  446. case STARPU_BLOCK_INTERFACE_ID:
  447. case STARPU_VECTOR_INTERFACE_ID:
  448. case STARPU_CSR_INTERFACE_ID:
  449. case STARPU_BCSR_INTERFACE_ID:
  450. case STARPU_MULTIFORMAT_INTERFACE_ID:
  451. fpga_msg("Interface not supported yet");
  452. default:
  453. STARPU_ABORT();
  454. }
  455. }
  456. }
  457. int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  458. {
  459. return _starpu_fpga_copy_ram_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  460. }
  461. int _starpu_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  462. {
  463. return _starpu_fpga_copy_fpga_to_ram((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  464. }
  465. int _starpu_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  466. {
  467. return _starpu_fpga_copy_fpga_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  468. }
  469. int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
  470. {
  471. int src_kind = starpu_node_get_kind(src_node);
  472. int dst_kind = starpu_node_get_kind(dst_node);
  473. STARPU_ASSERT(src_kind == STARPU_FPGA_RAM && dst_kind == STARPU_CPU_RAM);
  474. int ret = 1;
  475. const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
  476. if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
  477. !(copy_methods->fpga_to_ram_async || copy_methods->any_to_any))
  478. {
  479. /* this is not associated to a request so it's synchronous */
  480. STARPU_ASSERT(copy_methods->fpga_to_ram || copy_methods->any_to_any);
  481. if (copy_methods->fpga_to_ram)
  482. copy_methods->fpga_to_ram(src_interface, src_node, dst_interface, dst_node);
  483. else
  484. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
  485. }
  486. else
  487. {
  488. //req->async_channel.type = STARPU_FPGA_RAM;
  489. if (copy_methods->fpga_to_ram_async)
  490. ret = copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
  491. else
  492. {
  493. STARPU_ASSERT(copy_methods->any_to_any);
  494. ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
  495. }
  496. //_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), src_node);
  497. }
  498. return ret;
  499. }
  500. int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
  501. {
  502. int src_kind = starpu_node_get_kind(src_node);
  503. int dst_kind = starpu_node_get_kind(dst_node);
  504. STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_FPGA_RAM);
  505. const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
  506. if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
  507. !(copy_methods->ram_to_fpga_async || copy_methods->any_to_any))
  508. {
  509. /* this is not associated to a request so it's synchronous */
  510. STARPU_ASSERT(copy_methods->ram_to_fpga || copy_methods->any_to_any);
  511. if (copy_methods->ram_to_fpga)
  512. copy_methods->ram_to_fpga(src_interface, src_node, dst_interface, dst_node);
  513. else
  514. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
  515. }
  516. else
  517. {
  518. //req->async_channel.type = STARPU_FPGA_RAM;
  519. if (copy_methods->ram_to_fpga_async)
  520. copy_methods->ram_to_fpga_async(src_interface, src_node, dst_interface, dst_node);
  521. else
  522. {
  523. STARPU_ASSERT(copy_methods->any_to_any);
  524. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
  525. }
  526. //_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node);
  527. }
  528. return 0;
  529. }
  530. struct _starpu_driver_ops _starpu_driver_fpga_ops =
  531. {
  532. .init = _starpu_fpga_driver_init,
  533. .run = _starpu_run_fpga,
  534. .run_once = _starpu_fpga_driver_run_once,
  535. .deinit = _starpu_fpga_driver_deinit
  536. };
  537. // TODO: transfers
  538. struct _starpu_node_ops _starpu_driver_fpga_node_ops =
  539. {
  540. //.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
  541. //.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
  542. //.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
  543. .copy_interface_to[STARPU_FPGA_RAM] = NULL,
  544. .wait_request_completion = NULL,
  545. .test_request_completion = NULL,
  546. .is_direct_access_supported = NULL,
  547. .malloc_on_node = _starpu_fpga_allocate_memory,
  548. .free_on_node = NULL,
  549. .name = "fpga driver"
  550. };