simgrid.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012-2014 Université de Bordeaux
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <datawizard/memory_nodes.h>
  18. #include <common/config.h>
  19. #ifdef HAVE_UNISTD_H
  20. #include <unistd.h>
  21. #endif
  22. #include <core/perfmodel/perfmodel.h>
  23. #include <core/workers.h>
  24. #include <core/simgrid.h>
  25. #ifdef STARPU_SIMGRID
  26. #include <msg/msg.h>
  27. #include <smpi/smpif.h>
  28. #include <sys/resource.h>
  29. #define STARPU_MPI_AS_PREFIX "StarPU-MPI"
  30. #pragma weak starpu_main
  31. extern int starpu_main(int argc, char *argv[]);
  32. #pragma weak smpi_main
  33. extern int smpi_main(int (*realmain) (int argc, char *argv[]), int argc, char *argv[]);
  34. #pragma weak smpi_simulated_main_
  35. extern int smpi_simulated_main_(int argc, char *argv[]);
  36. #pragma weak starpu_mpi_world_rank
  37. extern int starpu_mpi_world_rank(void);
  38. #define _starpu_simgrid_running_smpi() (getenv("SMPI_GLOBAL_SIZE") != NULL)
  39. struct main_args
  40. {
  41. int argc;
  42. char **argv;
  43. };
  44. int do_starpu_main(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
  45. {
  46. struct main_args *args = MSG_process_get_data(MSG_process_self());
  47. return starpu_main(args->argc, args->argv);
  48. }
  49. #ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
  50. #ifdef HAVE_MSG_GET_AS_BY_NAME
  51. static msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
  52. {
  53. return MSG_get_as_by_name(name);
  54. }
  55. #else /* HAVE_MSG_GET_AS_BY_NAME */
  56. static msg_as_t __starpu_simgrid_get_as_by_name(msg_as_t root, const char *name)
  57. {
  58. xbt_dict_t dict;
  59. xbt_dict_cursor_t cursor;
  60. const char *key;
  61. msg_as_t as, ret;
  62. dict = MSG_environment_as_get_routing_sons(root);
  63. xbt_dict_foreach(dict, cursor, key, as) {
  64. if (!strcmp(MSG_environment_as_get_name(as), name))
  65. return as;
  66. ret = __starpu_simgrid_get_as_by_name(as, name);
  67. if (ret)
  68. return ret;
  69. }
  70. return NULL;
  71. }
  72. static msg_as_t _starpu_simgrid_get_as_by_name(const char *name)
  73. {
  74. return __starpu_simgrid_get_as_by_name(MSG_environment_get_routing_root(), name);
  75. }
  76. #endif /* HAVE_MSG_GET_AS_BY_NAME */
  77. #endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */
  78. int _starpu_simgrid_get_nbhosts(const char *prefix)
  79. {
  80. int ret;
  81. xbt_dynar_t hosts;
  82. unsigned i, nb;
  83. unsigned len = strlen(prefix);
  84. #ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
  85. if (_starpu_simgrid_running_smpi())
  86. {
  87. char name[16];
  88. STARPU_ASSERT(starpu_mpi_world_rank);
  89. snprintf(name, sizeof(name), STARPU_MPI_AS_PREFIX"%u", starpu_mpi_world_rank());
  90. hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(name));
  91. }
  92. else
  93. #endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */
  94. hosts = MSG_hosts_as_dynar();
  95. nb = xbt_dynar_length(hosts);
  96. ret = 0;
  97. for (i = 0; i < nb; i++) {
  98. const char *name;
  99. name = MSG_host_get_name(xbt_dynar_get_as(hosts, i, msg_host_t));
  100. if (!strncmp(name, prefix, len))
  101. ret++;
  102. }
  103. xbt_dynar_free(&hosts);
  104. return ret;
  105. }
  106. unsigned long long _starpu_simgrid_get_memsize(const char *prefix, unsigned devid)
  107. {
  108. char name[16];
  109. msg_host_t host;
  110. const char *memsize;
  111. snprintf(name, sizeof(name), "%s%u", prefix, devid);
  112. host = _starpu_simgrid_get_host_by_name(name);
  113. if (!host)
  114. return 0;
  115. if (!MSG_host_get_properties(host))
  116. return 0;
  117. memsize = MSG_host_get_property_value(host, "memsize");
  118. if (!memsize)
  119. return 0;
  120. return atoll(memsize);
  121. }
  122. msg_host_t _starpu_simgrid_get_host_by_name(const char *name)
  123. {
  124. if (_starpu_simgrid_running_smpi())
  125. {
  126. char mpiname[16];
  127. STARPU_ASSERT(starpu_mpi_world_rank);
  128. snprintf(mpiname, sizeof(mpiname), "%d-%s", starpu_mpi_world_rank(), name);
  129. return MSG_get_host_by_name(mpiname);
  130. }
  131. else
  132. return MSG_get_host_by_name(name);
  133. }
  134. #ifdef STARPU_DEVEL
  135. #warning TODO: use another way to start main, when simgrid provides it, and then include the application-provided configuration for platform numbers
  136. #endif
  137. #undef main
  138. int main(int argc, char **argv)
  139. {
  140. char path[256];
  141. if (!starpu_main && !(smpi_main && smpi_simulated_main_))
  142. {
  143. _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h included, to properly rename it into starpu_main\n");
  144. exit(EXIT_FAILURE);
  145. }
  146. if (_starpu_simgrid_running_smpi())
  147. {
  148. /* Oops, we are running SMPI, let it start Simgrid, and we'll
  149. * take back hand in _starpu_simgrid_init from starpu_init() */
  150. return smpi_main(smpi_simulated_main_, argc, argv);
  151. }
  152. MSG_init(&argc, argv);
  153. #if SIMGRID_VERSION_MAJOR < 3 || (SIMGRID_VERSION_MAJOR == 3 && SIMGRID_VERSION_MINOR < 9)
  154. /* Versions earlier than 3.9 didn't support our communication tasks */
  155. MSG_config("workstation/model", "ptask_L07");
  156. #endif
  157. /* Simgrid uses tiny stacks by default. This comes unexpected to our users. */
  158. extern xbt_cfg_t _sg_cfg_set;
  159. unsigned stack_size = 8192;
  160. struct rlimit rlim;
  161. if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur != 0 && rlim.rlim_cur != RLIM_INFINITY)
  162. stack_size = rlim.rlim_cur / 1024;
  163. xbt_cfg_set_int(_sg_cfg_set, "contexts/stack_size", stack_size);
  164. /* Load XML platform */
  165. _starpu_simgrid_get_platform_path(path, sizeof(path));
  166. MSG_create_environment(path);
  167. struct main_args args = { .argc = argc, .argv = argv };
  168. MSG_process_create("main", &do_starpu_main, &args, MSG_get_host_by_name("MAIN"));
  169. MSG_main();
  170. return 0;
  171. }
  172. void _starpu_simgrid_init()
  173. {
  174. xbt_dynar_t hosts;
  175. int i;
  176. if (!starpu_main && !(smpi_main && smpi_simulated_main_))
  177. {
  178. _STARPU_ERROR("In simgrid mode, the file containing the main() function of this application needs to be compiled with starpu.h included, to properly rename it into starpu_main\n");
  179. exit(EXIT_FAILURE);
  180. }
  181. #ifdef HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT
  182. if (_starpu_simgrid_running_smpi())
  183. {
  184. /* Take back hand to create the local platform for this MPI
  185. * node */
  186. char asname[16];
  187. char path[256];
  188. char cmdline[1024];
  189. FILE *in;
  190. int out;
  191. #ifdef HAVE_MKSTEMPS
  192. char template[] = "/tmp/"STARPU_MPI_AS_PREFIX"-platform-XXXXXX.xml";
  193. #else
  194. char template[] = "/tmp/"STARPU_MPI_AS_PREFIX"-platform-XXXXXX";
  195. #endif
  196. int ret;
  197. STARPU_ASSERT(starpu_mpi_world_rank);
  198. snprintf(asname, sizeof(asname), STARPU_MPI_AS_PREFIX"%u", starpu_mpi_world_rank());
  199. /* Get XML platform */
  200. _starpu_simgrid_get_platform_path(path, sizeof(path));
  201. in = fopen(path, "r");
  202. _starpu_frdlock(in);
  203. STARPU_ASSERT_MSG(in, "Could not open platform file %s", path);
  204. #ifdef HAVE_MKSTEMPS
  205. out = mkstemps(template, strlen(".xml"));
  206. #else
  207. out = mkstemp(template);
  208. #endif
  209. /* Generate modified XML platform */
  210. STARPU_ASSERT_MSG(out >= 0, "Could not create temporary file like %s", template);
  211. close(out);
  212. snprintf(cmdline, sizeof(cmdline), "xsltproc --novalid --stringparam ASname %s -o %s "STARPU_DATADIR"/starpu/starpu_smpi.xslt %s", asname, template, path);
  213. ret = system(cmdline);
  214. STARPU_ASSERT_MSG(ret == 0, "running xsltproc to generate SMPI platforms %s from %s failed", template, path);
  215. _starpu_frdunlock(in);
  216. fclose(in);
  217. /* And create it */
  218. MSG_create_environment(template);
  219. unlink(template);
  220. hosts = MSG_environment_as_get_hosts(_starpu_simgrid_get_as_by_name(asname));
  221. }
  222. else
  223. #endif /* HAVE_MSG_ENVIRONMENT_GET_ROUTING_ROOT */
  224. hosts = MSG_hosts_as_dynar();
  225. int nb = xbt_dynar_length(hosts);
  226. for (i = 0; i < nb; i++)
  227. MSG_host_set_data(xbt_dynar_get_as(hosts, i, msg_host_t), calloc(MAX_TSD, sizeof(void*)));
  228. xbt_dynar_free(&hosts);
  229. }
  230. /*
  231. * Tasks
  232. */
  233. struct task {
  234. msg_task_t task;
  235. int workerid;
  236. /* communication termination signalization */
  237. unsigned *finished;
  238. starpu_pthread_mutex_t *mutex;
  239. starpu_pthread_cond_t *cond;
  240. /* Task which waits for this task */
  241. struct task *next;
  242. };
  243. static struct task *last_task[STARPU_NMAXWORKERS];
  244. /* Actually execute the task. */
  245. static int task_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
  246. {
  247. struct task *task = MSG_process_get_data(MSG_process_self());
  248. _STARPU_DEBUG("task %p started\n", task);
  249. MSG_task_execute(task->task);
  250. MSG_task_destroy(task->task);
  251. _STARPU_DEBUG("task %p finished\n", task);
  252. STARPU_PTHREAD_MUTEX_LOCK(task->mutex);
  253. *task->finished = 1;
  254. STARPU_PTHREAD_COND_BROADCAST(task->cond);
  255. STARPU_PTHREAD_MUTEX_UNLOCK(task->mutex);
  256. /* The worker which started this task may be sleeping out of tasks, wake it */
  257. starpu_wake_worker(task->workerid);
  258. if (last_task[task->workerid] == task)
  259. last_task[task->workerid] = NULL;
  260. if (task->next)
  261. MSG_process_create("task", task_execute, task->next, MSG_host_self());
  262. free(task);
  263. return 0;
  264. }
  265. /* Wait for completion of all asynchronous tasks for this worker */
  266. void _starpu_simgrid_wait_tasks(int workerid)
  267. {
  268. struct task *task = last_task[workerid];
  269. if (!task)
  270. return;
  271. unsigned *finished = task->finished;
  272. starpu_pthread_mutex_t *mutex = task->mutex;
  273. starpu_pthread_cond_t *cond = task->cond;
  274. STARPU_PTHREAD_MUTEX_LOCK(mutex);
  275. while (!*finished)
  276. STARPU_PTHREAD_COND_WAIT(cond, mutex);
  277. STARPU_PTHREAD_MUTEX_UNLOCK(mutex);
  278. }
  279. /* Task execution submitted by StarPU */
  280. void _starpu_simgrid_submit_job(int workerid, struct _starpu_job *j, struct starpu_perfmodel_arch* perf_arch, double length, unsigned *finished, starpu_pthread_mutex_t *mutex, starpu_pthread_cond_t *cond)
  281. {
  282. struct starpu_task *starpu_task = j->task;
  283. msg_task_t simgrid_task;
  284. if (j->internal)
  285. /* This is not useful to include in simulation (and probably
  286. * doesn't have a perfmodel anyway) */
  287. return;
  288. if (isnan(length))
  289. {
  290. length = starpu_task_expected_length(starpu_task, perf_arch, j->nimpl);
  291. STARPU_ASSERT_MSG(!_STARPU_IS_ZERO(length) && !isnan(length),
  292. "Codelet %s does not have a perfmodel, or is not calibrated enough, please re-run in non-simgrid mode until it is calibrated",
  293. _starpu_job_get_model_name(j));
  294. }
  295. simgrid_task = MSG_task_create(_starpu_job_get_model_name(j),
  296. length/1000000.0*MSG_get_host_speed(MSG_host_self()),
  297. 0, NULL);
  298. if (finished == NULL)
  299. {
  300. /* Synchronous execution */
  301. /* First wait for previous tasks */
  302. _starpu_simgrid_wait_tasks(workerid);
  303. MSG_task_execute(simgrid_task);
  304. MSG_task_destroy(simgrid_task);
  305. }
  306. else
  307. {
  308. /* Asynchronous execution */
  309. struct task *task = malloc(sizeof(*task));
  310. task->task = simgrid_task;
  311. task->workerid = workerid;
  312. task->finished = finished;
  313. *finished = 0;
  314. task->mutex = mutex;
  315. task->cond = cond;
  316. task->next = NULL;
  317. /* Sleep 10µs for the GPU task queueing */
  318. MSG_process_sleep(0.000010);
  319. if (last_task[workerid])
  320. {
  321. /* Make this task depend on the previous */
  322. last_task[workerid]->next = task;
  323. last_task[workerid] = task;
  324. }
  325. else
  326. {
  327. last_task[workerid] = task;
  328. MSG_process_create("task", task_execute, task, MSG_host_self());
  329. }
  330. }
  331. }
  332. /*
  333. * Transfers
  334. */
  335. /* Note: simgrid is not parallel, so there is no need to hold locks for management of transfers. */
  336. LIST_TYPE(transfer,
  337. msg_task_t task;
  338. int src_node;
  339. int dst_node;
  340. int run_node;
  341. /* communication termination signalization */
  342. unsigned *finished;
  343. starpu_pthread_mutex_t *mutex;
  344. starpu_pthread_cond_t *cond;
  345. /* transfers which wait for this transfer */
  346. struct transfer **wake;
  347. unsigned nwake;
  348. /* Number of transfers that this transfer waits for */
  349. unsigned nwait;
  350. )
  351. struct transfer_list *pending;
  352. /* Tell for two transfers whether they should be handled in sequence */
  353. static int transfers_are_sequential(struct transfer *new_transfer, struct transfer *old_transfer)
  354. {
  355. int new_is_cuda STARPU_ATTRIBUTE_UNUSED, old_is_cuda STARPU_ATTRIBUTE_UNUSED;
  356. int new_is_opencl STARPU_ATTRIBUTE_UNUSED, old_is_opencl STARPU_ATTRIBUTE_UNUSED;
  357. int new_is_gpu_gpu, old_is_gpu_gpu;
  358. new_is_cuda = starpu_node_get_kind(new_transfer->src_node) == STARPU_CUDA_RAM;
  359. new_is_cuda |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_CUDA_RAM;
  360. old_is_cuda = starpu_node_get_kind(old_transfer->src_node) == STARPU_CUDA_RAM;
  361. old_is_cuda |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_CUDA_RAM;
  362. new_is_opencl = starpu_node_get_kind(new_transfer->src_node) == STARPU_OPENCL_RAM;
  363. new_is_opencl |= starpu_node_get_kind(new_transfer->dst_node) == STARPU_OPENCL_RAM;
  364. old_is_opencl = starpu_node_get_kind(old_transfer->src_node) == STARPU_OPENCL_RAM;
  365. old_is_opencl |= starpu_node_get_kind(old_transfer->dst_node) == STARPU_OPENCL_RAM;
  366. new_is_gpu_gpu = new_transfer->src_node && new_transfer->dst_node;
  367. old_is_gpu_gpu = old_transfer->src_node && old_transfer->dst_node;
  368. /* We ignore cuda-opencl transfers, they can not happen */
  369. STARPU_ASSERT(!((new_is_cuda && old_is_opencl) || (old_is_cuda && new_is_opencl)));
  370. /* The following constraints have been observed with CUDA alone */
  371. /* Same source/destination, sequential */
  372. if (new_transfer->src_node == old_transfer->src_node && new_transfer->dst_node == old_transfer->dst_node)
  373. return 1;
  374. /* Crossed GPU-GPU, sequential */
  375. if (new_is_gpu_gpu
  376. && new_transfer->src_node == old_transfer->dst_node
  377. && old_transfer->src_node == new_transfer->dst_node)
  378. return 1;
  379. /* GPU-GPU transfers are sequential with any RAM->GPU transfer */
  380. if (new_is_gpu_gpu
  381. && old_transfer->dst_node == new_transfer->src_node
  382. && old_transfer->dst_node == new_transfer->dst_node)
  383. return 1;
  384. if (old_is_gpu_gpu
  385. && new_transfer->dst_node == old_transfer->src_node
  386. && new_transfer->dst_node == old_transfer->dst_node)
  387. return 1;
  388. /* StarPU's constraint on CUDA transfers is using one stream per
  389. * source/destination pair, which is already handled above */
  390. return 0;
  391. }
  392. /* Actually execute the transfer, and then start transfers waiting for this one. */
  393. static int transfer_execute(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
  394. {
  395. struct transfer *transfer = MSG_process_get_data(MSG_process_self());
  396. unsigned i;
  397. _STARPU_DEBUG("transfer %p started\n", transfer);
  398. MSG_task_execute(transfer->task);
  399. MSG_task_destroy(transfer->task);
  400. _STARPU_DEBUG("transfer %p finished\n", transfer);
  401. STARPU_PTHREAD_MUTEX_LOCK(transfer->mutex);
  402. *transfer->finished = 1;
  403. STARPU_PTHREAD_COND_BROADCAST(transfer->cond);
  404. STARPU_PTHREAD_MUTEX_UNLOCK(transfer->mutex);
  405. /* The workers which started this request may be sleeping out of tasks, wake it */
  406. _starpu_wake_all_blocked_workers_on_node(transfer->run_node);
  407. /* Wake transfers waiting for my termination */
  408. /* Note: due to possible preemption inside process_create, the array
  409. * may grow while doing this */
  410. for (i = 0; i < transfer->nwake; i++)
  411. {
  412. struct transfer *wake = transfer->wake[i];
  413. STARPU_ASSERT(wake->nwait > 0);
  414. wake->nwait--;
  415. if (!wake->nwait)
  416. {
  417. _STARPU_DEBUG("triggering transfer %p\n", wake);
  418. MSG_process_create("transfer task", transfer_execute, wake, _starpu_simgrid_get_host_by_name("MAIN"));
  419. }
  420. }
  421. free(transfer->wake);
  422. transfer_list_erase(pending, transfer);
  423. transfer_delete(transfer);
  424. return 0;
  425. }
  426. /* Look for sequentialization between this transfer and pending transfers, and submit this one */
  427. static void transfer_submit(struct transfer *transfer)
  428. {
  429. struct transfer *old;
  430. if (!pending)
  431. pending = transfer_list_new();
  432. for (old = transfer_list_begin(pending);
  433. old != transfer_list_end(pending);
  434. old = transfer_list_next(old))
  435. {
  436. if (transfers_are_sequential(transfer, old))
  437. {
  438. _STARPU_DEBUG("transfer %p(%d->%d) waits for %p(%d->%d)\n",
  439. transfer, transfer->src_node, transfer->dst_node,
  440. old, old->src_node, old->dst_node);
  441. /* Make new wait for the old */
  442. transfer->nwait++;
  443. /* Make old wake the new */
  444. old->wake = realloc(old->wake, (old->nwake + 1) * sizeof(old->wake));
  445. old->wake[old->nwake] = transfer;
  446. old->nwake++;
  447. }
  448. }
  449. transfer_list_push_front(pending, transfer);
  450. if (!transfer->nwait)
  451. {
  452. _STARPU_DEBUG("transfer %p waits for nobody, starting\n", transfer);
  453. MSG_process_create("transfer task", transfer_execute, transfer, _starpu_simgrid_get_host_by_name("MAIN"));
  454. }
  455. }
  456. /* Data transfer issued by StarPU */
  457. int _starpu_simgrid_transfer(size_t size, unsigned src_node, unsigned dst_node, struct _starpu_data_request *req)
  458. {
  459. msg_task_t task;
  460. msg_host_t *hosts = calloc(2, sizeof(*hosts));
  461. double *computation = calloc(2, sizeof(*computation));
  462. double *communication = calloc(4, sizeof(*communication));
  463. starpu_pthread_mutex_t mutex;
  464. starpu_pthread_cond_t cond;
  465. unsigned finished;
  466. hosts[0] = _starpu_simgrid_memory_node_get_host(src_node);
  467. hosts[1] = _starpu_simgrid_memory_node_get_host(dst_node);
  468. STARPU_ASSERT(hosts[0] != hosts[1]);
  469. communication[1] = size;
  470. task = MSG_parallel_task_create("copy", 2, hosts, computation, communication, NULL);
  471. struct transfer *transfer = transfer_new();
  472. _STARPU_DEBUG("creating transfer %p for %lu bytes\n", transfer, (unsigned long) size);
  473. transfer->task = task;
  474. transfer->src_node = src_node;
  475. transfer->dst_node = dst_node;
  476. transfer->run_node = _starpu_memory_node_get_local_key();
  477. if (req)
  478. {
  479. transfer->finished = &req->async_channel.event.finished;
  480. transfer->mutex = &req->async_channel.event.mutex;
  481. transfer->cond = &req->async_channel.event.cond;
  482. }
  483. else
  484. {
  485. transfer->finished = &finished;
  486. transfer->mutex = &mutex;
  487. transfer->cond = &cond;
  488. }
  489. *transfer->finished = 0;
  490. STARPU_PTHREAD_MUTEX_INIT(transfer->mutex, NULL);
  491. STARPU_PTHREAD_COND_INIT(transfer->cond, NULL);
  492. transfer->wake = NULL;
  493. transfer->nwake = 0;
  494. transfer->nwait = 0;
  495. if (req)
  496. _STARPU_TRACE_START_DRIVER_COPY_ASYNC(src_node, dst_node);
  497. /* Sleep 10µs for the GPU transfer queueing */
  498. MSG_process_sleep(0.000010);
  499. transfer_submit(transfer);
  500. /* Note: from here, transfer might be already freed */
  501. if (req)
  502. {
  503. _STARPU_TRACE_END_DRIVER_COPY_ASYNC(src_node, dst_node);
  504. _STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
  505. return -EAGAIN;
  506. }
  507. else
  508. {
  509. /* this is not associated to a request so it's synchronous */
  510. STARPU_PTHREAD_MUTEX_LOCK(&mutex);
  511. while (!finished)
  512. STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
  513. STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
  514. return 0;
  515. }
  516. }
  517. int
  518. _starpu_simgrid_thread_start(int argc STARPU_ATTRIBUTE_UNUSED, char *argv[] STARPU_ATTRIBUTE_UNUSED)
  519. {
  520. struct _starpu_pthread_args *_args = MSG_process_get_data(MSG_process_self());
  521. struct _starpu_pthread_args args = *_args;
  522. free(_args);
  523. args.f(args.arg);
  524. return 0;
  525. }
  526. #endif