driver_fpga.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2019-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <starpu_fpga.h>
  18. #include <starpu_profiling.h>
  19. #include <common/utils.h>
  20. #include <common/config.h>
  21. #include <core/debug.h>
  22. #include <drivers/driver_common/driver_common.h>
  23. #include "driver_fpga.h"
  24. #include <core/sched_policy.h>
  25. #include <datawizard/memory_manager.h>
  26. #include <datawizard/memory_nodes.h>
  27. #include <datawizard/malloc.h>
  28. //#include <MaxSLiCInterface.h>
  29. #define KNRM "\x1B[0m"
  30. #define KRED "\x1B[31m"
  31. #define KGRN "\x1B[32m"
  32. #define KYEL "\x1B[33m"
  33. #define KBLU "\x1B[34m"
  34. #define KMAG "\x1B[35m"
  35. #define KCYN "\x1B[36m"
  36. #define KWHT "\x1B[37m"
  37. #define FPGA_OK KGRN
  38. #define FPGA_ERROR KRED
  39. #define NORMAL KNRM
  40. #define FPGA_OK KGRN
  41. //#define STARPU_MAXFPGADEVS 4
  42. /* the number of FPGA devices */
  43. static unsigned nfpgafpgas;
  44. static size_t global_mem[STARPU_MAXFPGADEVS];
  45. static max_engine_t *engines[STARPU_MAXFPGADEVS];
  46. static fpga_mem current_address[STARPU_MAXFPGADEVS];
  47. static void _starpu_fpga_limit_global_mem(unsigned );
  48. static size_t _starpu_fpga_get_global_mem_size(unsigned devid);
  49. void fpga_msg(char *msg)
  50. {
  51. printf(FPGA_OK "%s\n" NORMAL, msg);
  52. }
  53. max_engine_t *starpu_fpga_get_local_engine(void)
  54. {
  55. int worker = starpu_worker_get_id_check();
  56. int devid = starpu_worker_get_devid(worker);
  57. STARPU_ASSERT_MSG(engines[devid], "engine for fpga %d on worker %d is NULL!?", devid, worker);
  58. return engines[devid];
  59. }
  60. void _starpu_init_fpga()
  61. {
  62. }
  63. void _starpu_fpga_discover_devices (struct _starpu_machine_config *config)
  64. {
  65. //TODO: This is statically assigned, in the next round of integration
  66. // I will have to read from the struct fpga in fpga
  67. struct starpu_max_load *load = _starpu_config.conf.fpga_load;
  68. const char *sim_socket = max_config_get_string(MAX_CONFIG_USE_SIMULATION);
  69. int n;
  70. n = starpu_get_env_number("STARPU_NUM_FPGA_FPGA");
  71. if (n != -1)
  72. {
  73. config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
  74. return;
  75. }
  76. if (!load)
  77. {
  78. /* Nothing specified, single-FPGA execution with basic static
  79. * interface, file will be auto-loaded by SLiC. */
  80. n = 1;
  81. }
  82. else
  83. {
  84. struct starpu_max_load *cur, *star = NULL;
  85. size_t nstar = 0;
  86. /* First check if we have a star, we will want to subtract non-star loads from it */
  87. for (cur = load; cur->engine_id_pattern; cur++)
  88. if (!strcmp(cur->engine_id_pattern, "*")
  89. || strstr(cur->engine_id_pattern, ":*"))
  90. {
  91. STARPU_ASSERT_MSG(!cur[1].file, "in starpu_max_load array, * pattern must be last");
  92. star = cur;
  93. if (sim_socket)
  94. /* not specified, assume 1 */
  95. nstar = 1;
  96. else
  97. nstar = max_count_engines_free(cur->file, star->engine_id_pattern);
  98. break;
  99. }
  100. n = 0;
  101. /* Now check the non-star loads */
  102. for (cur = load; cur != star && cur->engine_id_pattern; cur++)
  103. {
  104. size_t size;
  105. size = max_count_engines_free(load->file, load->engine_id_pattern);
  106. STARPU_ASSERT_MSG(size > 0, "cannot load starpu_max_load element %d on %s", (unsigned) (cur - load), load->engine_id_pattern);
  107. /* One FPGA more to be used */
  108. n++;
  109. if (star)
  110. {
  111. size = max_count_engines_free(load->file, star->engine_id_pattern);
  112. if (size > 1)
  113. /* One of the star devices will be used to load this file */
  114. nstar--;
  115. }
  116. }
  117. n += nstar;
  118. }
  119. //LMemInterface addLMemInterface()
  120. //// pour récupérer l'accès à la LMem
  121. config->topology.nhwdevices[STARPU_FPGA_WORKER] = nfpgafpgas = n;
  122. }
  123. unsigned _starpu_fpga_get_device_count(void)
  124. {
  125. return nfpgafpgas;
  126. }
  127. static void _starpu_fpga_limit_global_mem(unsigned devid)
  128. {
  129. starpu_ssize_t limit=-1;
  130. //TODO
  131. limit = starpu_get_env_number("STARPU_LIMIT_FPGA_MEM");
  132. if(limit != -1)
  133. global_mem[devid] = limit*1024*1024;
  134. }
  135. static size_t _starpu_fpga_get_global_mem_size(unsigned devid)
  136. {
  137. return global_mem[devid];
  138. }
  139. static void init_device_context(unsigned devid)
  140. {
  141. struct starpu_max_load *load = _starpu_config.conf.fpga_load;
  142. /* 0 would be seen as NULL, i.e. allocation failed... */
  143. // FIXME: Maxeler FPGAs want 192-byte alignment
  144. // TODO: use int max_get_burst_size (max_file_t *maxfile, const char *name)
  145. current_address[devid] = (fpga_mem) (8192*192);
  146. global_mem[devid] = 128ULL*1024*1024*1024;
  147. _starpu_fpga_limit_global_mem(devid);
  148. if (!load) {
  149. /* Nothing specified, single-FPGA execution with basic static
  150. * interface, file will be auto-loaded by SLiC. */
  151. return;
  152. } else {
  153. unsigned n;
  154. /* Which load we shall use */
  155. for (n = 0; load->file; n++, load++)
  156. {
  157. if (!strcmp(load->engine_id_pattern, "*")
  158. || strstr(load->engine_id_pattern, ":*"))
  159. break;
  160. if (n == devid)
  161. break;
  162. }
  163. STARPU_ASSERT(load->file);
  164. if (!strcmp(load->engine_id_pattern, "*")
  165. || strstr(load->engine_id_pattern, ":*"))
  166. {
  167. char s[strlen(load->engine_id_pattern) + 32];
  168. if (!strcmp(load->engine_id_pattern, "*"))
  169. snprintf(s, sizeof(s), "*:%u", (unsigned) devid);
  170. else
  171. {
  172. char *colon = strstr(load->engine_id_pattern, ":*");
  173. snprintf(s, sizeof(s), "%.*s:%u",
  174. (int) (colon - load->engine_id_pattern),
  175. load->engine_id_pattern,
  176. (unsigned) devid);
  177. }
  178. /* FIXME: this assumes that the loads are in-order.
  179. * Ideally we'd detect which ones had an explicit load */
  180. engines[devid] = max_load(load->file, s);
  181. STARPU_ASSERT_MSG(engines[devid], "engine %d (part of *) could not be loaded\n", n);
  182. }
  183. else
  184. {
  185. engines[n] = max_load(load->file, load->engine_id_pattern);
  186. STARPU_ASSERT_MSG(engines[n], "engine %d could not be loaded\n", n);
  187. }
  188. }
  189. }
  190. int _starpu_fpga_driver_init(struct _starpu_worker *worker)
  191. {
  192. int devid = worker->devid;
  193. //fpga_msg("successful till here");
  194. _starpu_driver_start(worker, STARPU_FPGA_WORKER, 1);
  195. /* FIXME: when we have NUMA support, properly turn node number into NUMA node number */
  196. // TODO: drop test when we allocated a memory node for fpga
  197. if (worker->memory_node != STARPU_MAIN_RAM)
  198. _starpu_memory_manager_set_global_memory_size(worker->memory_node, _starpu_fpga_get_global_mem_size(worker->devid));
  199. // TODO: multiple fpga in same thread
  200. init_device_context(devid);
  201. snprintf(worker->name, sizeof(worker->name), "FPGA %d", devid);
  202. snprintf(worker->short_name, sizeof(worker->short_name), "FPGA %d", devid);
  203. starpu_pthread_setname(worker->short_name);
  204. _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
  205. /* tell the main thread that we are ready */
  206. STARPU_PTHREAD_MUTEX_LOCK(&worker->mutex);
  207. worker->status = STATUS_UNKNOWN;
  208. worker->worker_is_initialized = 1;
  209. STARPU_PTHREAD_COND_SIGNAL(&worker->ready_cond);
  210. STARPU_PTHREAD_MUTEX_UNLOCK(&worker->mutex);
  211. return 0;
  212. }
  213. static int execute_job_on_fpga(struct _starpu_job *j, struct starpu_task *worker_task, struct _starpu_worker *fpga_args, int rank, struct starpu_perfmodel_arch* perf_arch)
  214. {
  215. int ret;
  216. int profiling = starpu_profiling_status_get();
  217. struct starpu_task *task = j->task;
  218. struct starpu_codelet *cl = task->cl;
  219. STARPU_ASSERT(cl);
  220. /* TODO: use asynchronous */
  221. ret = _starpu_fetch_task_input(task, j, 0);
  222. if (ret != 0)
  223. {
  224. /* there was not enough memory so the codelet cannot be executed right now ... */
  225. /* push the codelet back and try another one ... */
  226. return -EAGAIN;
  227. }
  228. /* Give profiling variable */
  229. _starpu_driver_start_job(fpga_args, j, perf_arch, rank, profiling);
  230. /* In case this is a Fork-join parallel task, the worker does not
  231. * execute the kernel at all. */
  232. if ((rank == 0) || (cl->type != STARPU_FORKJOIN))
  233. {
  234. _starpu_cl_func_t func = _starpu_task_get_fpga_nth_implementation(cl, j->nimpl);
  235. STARPU_ASSERT_MSG(func, "when STARPU_FPGA is defined in 'where', fpga_func or fpga_funcs has to be defined");
  236. if (_starpu_get_disable_kernels() <= 0)
  237. {
  238. _STARPU_TRACE_START_EXECUTING();
  239. func(_STARPU_TASK_GET_INTERFACES(task), task->cl_arg);
  240. _STARPU_TRACE_END_EXECUTING();
  241. }
  242. }
  243. _starpu_driver_end_job(fpga_args, j, perf_arch, rank, profiling);
  244. _starpu_driver_update_job_feedback(j, fpga_args, perf_arch, profiling);
  245. _starpu_push_task_output(j);
  246. return 0;
  247. }
  248. int _starpu_fpga_driver_run_once(struct _starpu_worker *fpga_worker)
  249. {
  250. unsigned memnode = fpga_worker->memory_node;
  251. int workerid = fpga_worker->workerid;
  252. _STARPU_TRACE_START_PROGRESS(memnode);
  253. _starpu_datawizard_progress(1);
  254. if (memnode != STARPU_MAIN_RAM)
  255. {
  256. _starpu_datawizard_progress(1);
  257. }
  258. _STARPU_TRACE_END_PROGRESS(memnode);
  259. struct _starpu_job *j;
  260. struct starpu_task *task;
  261. int res;
  262. task = _starpu_get_worker_task(fpga_worker, workerid, memnode);
  263. if (!task)
  264. return 0;
  265. j = _starpu_get_job_associated_to_task(task);
  266. /* can a cpu perform that task ? */
  267. if (!_STARPU_MAY_PERFORM(j, FPGA))
  268. {
  269. /* put it and the end of the queue ... XXX */
  270. _starpu_push_task_to_workers(task);
  271. return 0;
  272. }
  273. int rank = 0;
  274. int is_parallel_task = (j->task_size > 1);
  275. struct starpu_perfmodel_arch* perf_arch;
  276. if (is_parallel_task)
  277. {
  278. STARPU_PTHREAD_MUTEX_LOCK(&j->sync_mutex);
  279. rank = j->active_task_alias_count++;
  280. STARPU_PTHREAD_MUTEX_UNLOCK(&j->sync_mutex);
  281. if(j->combined_workerid != -1)
  282. {
  283. struct _starpu_combined_worker *combined_worker;
  284. combined_worker = _starpu_get_combined_worker_struct(j->combined_workerid);
  285. fpga_worker->combined_workerid = j->combined_workerid;
  286. fpga_worker->worker_size = combined_worker->worker_size;
  287. fpga_worker->current_rank = rank;
  288. perf_arch = &combined_worker->perf_arch;
  289. }
  290. else
  291. {
  292. struct _starpu_sched_ctx *sched_ctx = _starpu_sched_ctx_get_sched_ctx_for_worker_and_job(fpga_worker, j);
  293. STARPU_ASSERT_MSG(sched_ctx != NULL, "there should be a worker %d in the ctx of this job \n", fpga_worker->workerid);
  294. perf_arch = &sched_ctx->perf_arch;
  295. }
  296. }
  297. else
  298. {
  299. fpga_worker->combined_workerid = fpga_worker->workerid;
  300. fpga_worker->worker_size = 1;
  301. fpga_worker->current_rank = 0;
  302. perf_arch = &fpga_worker->perf_arch;
  303. }
  304. _starpu_set_current_task(j->task);
  305. fpga_worker->current_task = j->task;
  306. res = execute_job_on_fpga(j, task, fpga_worker, rank, perf_arch);
  307. _starpu_set_current_task(NULL);
  308. fpga_worker->current_task = NULL;
  309. if (res)
  310. {
  311. switch (res)
  312. {
  313. case -EAGAIN:
  314. _starpu_push_task_to_workers(task);
  315. return 0;
  316. default:
  317. STARPU_ABORT();
  318. }
  319. }
  320. /* In the case of combined workers, we need to inform the
  321. * scheduler each worker's execution is over.
  322. * Then we free the workers' task alias */
  323. if (is_parallel_task)
  324. {
  325. _starpu_sched_post_exec_hook(task);
  326. free(task);
  327. }
  328. if (rank == 0)
  329. _starpu_handle_job_termination(j);
  330. return 0;
  331. }
  332. int _starpu_fpga_driver_deinit(struct _starpu_worker *fpga_worker)
  333. {
  334. _STARPU_TRACE_WORKER_DEINIT_START;
  335. unsigned memnode = fpga_worker->memory_node;
  336. _starpu_datawizard_handle_all_pending_node_data_requests(memnode);
  337. /* In case there remains some memory that was automatically
  338. * allocated by StarPU, we release it now. Note that data
  339. * coherency is not maintained anymore at that point ! */
  340. _starpu_free_all_automatically_allocated_buffers(memnode);
  341. fpga_worker->worker_is_initialized = 0;
  342. _STARPU_TRACE_WORKER_DEINIT_END(STARPU_FPGA_WORKER);
  343. return 0;
  344. }
  345. void *_starpu_fpga_worker(void *_arg)
  346. {
  347. struct _starpu_worker* worker = _arg;
  348. unsigned memnode = worker->memory_node;
  349. _starpu_fpga_driver_init(worker);
  350. _STARPU_TRACE_START_PROGRESS(memnode);
  351. while (_starpu_machine_is_running())
  352. {
  353. _starpu_may_pause();
  354. //fpga_msg("\tEntered the main loop\n");
  355. _starpu_fpga_driver_run_once(worker);
  356. }
  357. _STARPU_TRACE_END_PROGRESS(memnode);
  358. _starpu_fpga_driver_deinit(worker);
  359. return NULL;
  360. }
  361. uintptr_t _starpu_fpga_allocate_memory(unsigned dst_node, size_t size, int flags)
  362. {
  363. (void) flags;
  364. unsigned devid = starpu_memory_node_get_devid(dst_node);
  365. fpga_mem addr, next_addr;
  366. addr = current_address[devid];
  367. next_addr = current_address[devid] + size;
  368. if (next_addr >= (fpga_mem) global_mem[devid])
  369. {
  370. printf("Memory overflow on %d\n", devid);
  371. return 0;
  372. }
  373. current_address[devid] = next_addr;
  374. printf("fpga mem returned from allocation @: %p - %p\n",addr, addr + size);
  375. return (uintptr_t) addr;
  376. }
  377. int _starpu_fpga_copy_ram_to_fpga(void *src, void *dst, size_t size)
  378. {
  379. printf("ram to fpga, fpga @= %p\n",dst);
  380. memcpy(dst,src,size);
  381. return 0;
  382. // LMemLoopback_writeLMem(dst, size, src);
  383. }
  384. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  385. * * node to the address pointed by DST in the DST_NODE memory node
  386. * */
  387. void copy_ram_to_fpga(void *src, void *dst, size_t size)
  388. {
  389. printf("ram to fpga, fpga @= %p\n",dst);
  390. // LMemLoopback_writeLMem(size, dst, src);
  391. }
  392. void copy_fpga_to_ram(void *src, void *dst, size_t size)
  393. {
  394. printf("ram to fpga, fpga @= %p\n",src);
  395. //LMemLoopback_readLMem(size, src, dst);
  396. }
  397. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  398. * node to the address pointed by DST in the DST_NODE memory node
  399. */
  400. int _starpu_fpga_copy_fpga_to_ram(void *src, void *dst, size_t size)
  401. {
  402. printf("fpga to ram, fpga @= %p\n",src);
  403. memcpy(dst,src,size);
  404. return 0;
  405. //LMemLoopback_readLMem(src, size, dst);
  406. }
  407. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  408. * node to the address pointed by DST in the DST_NODE memory node
  409. */
  410. int _starpu_fpga_copy_fpga_to_fpga(void *src, void *dst, size_t size)
  411. {
  412. printf("fpga to ram, fpga @= %p\n",src);
  413. memcpy(dst,src,size);
  414. return 0;
  415. //LMemLoopback_XXXLMem(src, size, dst);
  416. }
  417. /* Asynchronous transfers */
  418. int _starpu_fpga_copy_ram_to_fpga_async(void *src, void *dst, size_t size)
  419. {
  420. printf("ram to fpga, fpga @= %p\n",dst);
  421. memcpy(dst,src,size);
  422. return 0;
  423. // Trouver dans la doc une version asynchrone de LMemLoopback_writeLMem();
  424. }
  425. int _starpu_fpga_copy_fpga_to_ram_async(void *src, void *dst, size_t size)
  426. {
  427. printf("fpga to ram, fpga @= %p\n",src);
  428. memcpy(dst,src,size);
  429. return 0;
  430. }
  431. int _starpu_run_fpga(struct _starpu_worker *workerarg)
  432. {
  433. /* Let's go ! */
  434. _starpu_fpga_worker(workerarg);
  435. fpga_msg("\t !!!!! ~~~ I AM IN THE DRIVER ~~~\n");
  436. return 0;
  437. }
  438. void _starpu_fpga_transfer_data(void *buffers[], struct _starpu_job *j, int chnl)
  439. {
  440. struct starpu_task *task = j->task;
  441. unsigned nbuffers = STARPU_TASK_GET_NBUFFERS(task);
  442. unsigned index;
  443. for (index = 0; index < nbuffers; index++)
  444. {
  445. starpu_data_handle_t handle = STARPU_TASK_GET_HANDLE(task, index);
  446. // enum starpu_data_access_mode mode = STARPU_TASK_GET_MODE(task, index);
  447. unsigned *interface_id = (unsigned *)malloc(sizeof(unsigned));
  448. *interface_id = handle->ops->interfaceid;
  449. switch (*interface_id)
  450. {
  451. case STARPU_VARIABLE_INTERFACE_ID:
  452. {
  453. void *ptr = (void*) STARPU_VARIABLE_GET_PTR(buffers[index]);
  454. size_t size = STARPU_VARIABLE_GET_ELEMSIZE(buffers[index]);
  455. //fpga_data_send(chnl,ptr,size);
  456. printf("Driver Fpga @: %p, size %lu \n",ptr,size);
  457. break;
  458. }
  459. case STARPU_MATRIX_INTERFACE_ID:
  460. case STARPU_BLOCK_INTERFACE_ID:
  461. case STARPU_VECTOR_INTERFACE_ID:
  462. case STARPU_CSR_INTERFACE_ID:
  463. case STARPU_BCSR_INTERFACE_ID:
  464. case STARPU_MULTIFORMAT_INTERFACE_ID:
  465. fpga_msg("Interface not supported yet");
  466. default:
  467. STARPU_ABORT();
  468. }
  469. }
  470. }
  471. int _starpu_fpga_copy_data_from_cpu_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  472. {
  473. return _starpu_fpga_copy_ram_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  474. }
  475. int _starpu_fpga_copy_data_from_fpga_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  476. {
  477. return _starpu_fpga_copy_fpga_to_ram((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  478. }
  479. int _starpu_fpga_copy_data_from_fpga_to_fpga(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t ssize, struct _starpu_async_channel *async_channel)
  480. {
  481. return _starpu_fpga_copy_fpga_to_fpga((void*) src + src_offset, (void*) dst + dst_offset, ssize);
  482. }
  483. int _starpu_fpga_copy_interface_from_fpga_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
  484. {
  485. int src_kind = starpu_node_get_kind(src_node);
  486. int dst_kind = starpu_node_get_kind(dst_node);
  487. STARPU_ASSERT(src_kind == STARPU_FPGA_RAM && dst_kind == STARPU_CPU_RAM);
  488. int ret = 1;
  489. const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
  490. if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
  491. !(copy_methods->fpga_to_ram_async || copy_methods->any_to_any))
  492. {
  493. /* this is not associated to a request so it's synchronous */
  494. STARPU_ASSERT(copy_methods->fpga_to_ram || copy_methods->any_to_any);
  495. if (copy_methods->fpga_to_ram)
  496. copy_methods->fpga_to_ram(src_interface, src_node, dst_interface, dst_node);
  497. else
  498. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
  499. }
  500. else
  501. {
  502. //req->async_channel.type = STARPU_FPGA_RAM;
  503. if (copy_methods->fpga_to_ram_async)
  504. ret = copy_methods->fpga_to_ram_async(src_interface, src_node, dst_interface, dst_node);
  505. else
  506. {
  507. STARPU_ASSERT(copy_methods->any_to_any);
  508. ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
  509. }
  510. //_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), src_node);
  511. }
  512. return ret;
  513. }
  514. int _starpu_fpga_copy_interface_from_cpu_to_fpga(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req)
  515. {
  516. int src_kind = starpu_node_get_kind(src_node);
  517. int dst_kind = starpu_node_get_kind(dst_node);
  518. STARPU_ASSERT(src_kind == STARPU_CPU_RAM && dst_kind == STARPU_FPGA_RAM);
  519. const struct starpu_data_copy_methods *copy_methods = handle->ops->copy_methods;
  520. if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_fpga_copy_disabled() ||
  521. !(copy_methods->ram_to_fpga_async || copy_methods->any_to_any))
  522. {
  523. /* this is not associated to a request so it's synchronous */
  524. STARPU_ASSERT(copy_methods->ram_to_fpga || copy_methods->any_to_any);
  525. if (copy_methods->ram_to_fpga)
  526. copy_methods->ram_to_fpga(src_interface, src_node, dst_interface, dst_node);
  527. else
  528. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
  529. }
  530. else
  531. {
  532. //req->async_channel.type = STARPU_FPGA_RAM;
  533. if (copy_methods->ram_to_fpga_async)
  534. copy_methods->ram_to_fpga_async(src_interface, src_node, dst_interface, dst_node);
  535. else
  536. {
  537. STARPU_ASSERT(copy_methods->any_to_any);
  538. copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
  539. }
  540. //_starpu_fpga_init_event(&(req->async_channel.event.fpga_event), dst_node);
  541. }
  542. return 0;
  543. }
  544. struct _starpu_driver_ops _starpu_driver_fpga_ops =
  545. {
  546. .init = _starpu_fpga_driver_init,
  547. .run = _starpu_run_fpga,
  548. .run_once = _starpu_fpga_driver_run_once,
  549. .deinit = _starpu_fpga_driver_deinit
  550. };
  551. // TODO: transfers
  552. struct _starpu_node_ops _starpu_driver_fpga_node_ops =
  553. {
  554. //.copy_data_to[STARPU_CPU_RAM] = _starpu_fpga_copy_data_from_fpga_to_cpu,
  555. //.copy_data_to[STARPU_FPGA_RAM] = _starpu_fpga_copy_data_from_fpga_to_fpga,
  556. //.copy_interface_to[STARPU_CPU_RAM] = _starpu_fpga_copy_interface_from_fpga_to_cpu,
  557. .copy_interface_to[STARPU_FPGA_RAM] = NULL,
  558. .wait_request_completion = NULL,
  559. .test_request_completion = NULL,
  560. .is_direct_access_supported = NULL,
  561. .malloc_on_node = _starpu_fpga_allocate_memory,
  562. .free_on_node = NULL,
  563. .name = "fpga driver"
  564. };