sink_common.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012 Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <common/config.h>
  18. #include <common/utils.h>
  19. #include <drivers/mp_common/mp_common.h>
  20. #include <datawizard/interfaces/data_interface.h>
  21. #include <common/barrier.h>
  22. #include <core/workers.h>
  23. #include <common/barrier_counter.h>
  24. #ifdef STARPU_USE_MIC
  25. #include <common/COISysInfo_common.h>
  26. #endif
  27. #include "sink_common.h"
  28. /* Return the sink kind of the running process, based on the value of the
  29. * STARPU_SINK environment variable.
  30. * If there is no valid value retrieved, return STARPU_INVALID_KIND
  31. */
  32. static enum _starpu_mp_node_kind _starpu_sink_common_get_kind(void)
  33. {
  34. /* Environment varible STARPU_SINK must be defined when running on sink
  35. * side : let's use it to get the kind of node we're running on */
  36. char *node_kind = getenv("STARPU_SINK");
  37. STARPU_ASSERT(node_kind);
  38. if (!strcmp(node_kind, "STARPU_MIC"))
  39. return STARPU_MIC_SINK;
  40. else if (!strcmp(node_kind, "STARPU_SCC"))
  41. return STARPU_SCC_SINK;
  42. else if (!strcmp(node_kind, "STARPU_MPI"))
  43. return STARPU_MPI_SINK;
  44. else
  45. return STARPU_INVALID_KIND;
  46. }
  47. /* Send to host the number of cores of the sink device
  48. */
  49. static void _starpu_sink_common_get_nb_cores (struct _starpu_mp_node *node)
  50. {
  51. // Process packet received from `_starpu_src_common_sink_cores'.
  52. _starpu_mp_common_send_command (node, STARPU_ANSWER_SINK_NBCORES,
  53. &node->nb_cores, sizeof (int));
  54. }
  55. /* Send to host the address of the function given in parameter
  56. */
  57. static void _starpu_sink_common_lookup(const struct _starpu_mp_node *node,
  58. char *func_name)
  59. {
  60. void (*func)(void);
  61. func = node->lookup(node,func_name);
  62. //_STARPU_DEBUG("Looked up %s, got %p\n", func_name, func);
  63. /* If we couldn't find the function, let's send an error to the host.
  64. * The user probably made a mistake in the name */
  65. if (func)
  66. _starpu_mp_common_send_command(node, STARPU_ANSWER_LOOKUP,
  67. &func, sizeof(func));
  68. else
  69. _starpu_mp_common_send_command(node, STARPU_ERROR_LOOKUP,
  70. NULL, 0);
  71. }
  72. /* Allocate a memory space and send the address of this space to the host
  73. */
  74. void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node,
  75. void *arg, int arg_size)
  76. {
  77. STARPU_ASSERT(arg_size == sizeof(size_t));
  78. void *addr = malloc(*(size_t *)(arg));
  79. /* If the allocation fail, let's send an error to the host.
  80. */
  81. if (addr)
  82. _starpu_mp_common_send_command(mp_node, STARPU_ANSWER_ALLOCATE,
  83. &addr, sizeof(addr));
  84. else
  85. _starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE,
  86. NULL, 0);
  87. }
  88. void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED,
  89. void *arg, int arg_size)
  90. {
  91. STARPU_ASSERT(arg_size == sizeof(void *));
  92. free(*(void **)(arg));
  93. }
  94. static void _starpu_sink_common_copy_from_host(const struct _starpu_mp_node *mp_node,
  95. void *arg, int arg_size)
  96. {
  97. STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command));
  98. struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
  99. mp_node->dt_recv(mp_node, cmd->addr, cmd->size);
  100. }
  101. static void _starpu_sink_common_copy_to_host(const struct _starpu_mp_node *mp_node,
  102. void *arg, int arg_size)
  103. {
  104. STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command));
  105. struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
  106. mp_node->dt_send(mp_node, cmd->addr, cmd->size);
  107. }
  108. static void _starpu_sink_common_copy_from_sink(const struct _starpu_mp_node *mp_node,
  109. void *arg, int arg_size)
  110. {
  111. STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command_to_device));
  112. struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
  113. mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size);
  114. _starpu_mp_common_send_command(mp_node, STARPU_TRANSFER_COMPLETE, NULL, 0);
  115. }
  116. static void _starpu_sink_common_copy_to_sink(const struct _starpu_mp_node *mp_node,
  117. void *arg, int arg_size)
  118. {
  119. STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command_to_device));
  120. struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
  121. mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size);
  122. }
  123. /* Receive workers and combined workers and store them into the struct config
  124. */
  125. static void _starpu_sink_common_recv_workers(struct _starpu_mp_node * node, void *arg, int arg_size)
  126. {
  127. /* Retrieve information from the message */
  128. STARPU_ASSERT(arg_size == (sizeof(int)*5));
  129. void * arg_ptr = arg;
  130. int i;
  131. int nworkers = *(int *)arg_ptr;
  132. arg_ptr += sizeof(nworkers);
  133. int worker_size = *(int *)arg_ptr;
  134. arg_ptr += sizeof(worker_size);
  135. int combined_worker_size = *(int *)arg_ptr;
  136. arg_ptr += sizeof(combined_worker_size);
  137. int baseworkerid = *(int *)arg_ptr;
  138. arg_ptr += sizeof(baseworkerid);
  139. struct _starpu_machine_config *config = _starpu_get_machine_config();
  140. config->topology.nworkers = *(int *)arg_ptr;
  141. /* Retrieve workers */
  142. struct _starpu_worker * workers = &config->workers[baseworkerid];
  143. node->dt_recv(node,workers,worker_size);
  144. /* Update workers to have coherent field */
  145. for(i=0; i<nworkers; i++)
  146. {
  147. workers[i].config = config;
  148. starpu_pthread_mutex_init(&workers[i].mutex,NULL);
  149. starpu_pthread_mutex_destroy(&workers[i].mutex);
  150. starpu_pthread_cond_init(&workers[i].started_cond,NULL);
  151. starpu_pthread_cond_destroy(&workers[i].started_cond);
  152. starpu_pthread_cond_init(&workers[i].ready_cond,NULL);
  153. starpu_pthread_cond_destroy(&workers[i].ready_cond);
  154. starpu_pthread_mutex_init(&workers[i].sched_mutex,NULL);
  155. starpu_pthread_mutex_destroy(&workers[i].sched_mutex);
  156. starpu_pthread_cond_init(&workers[i].sched_cond,NULL);
  157. starpu_pthread_cond_destroy(&workers[i].sched_cond);
  158. workers[i].current_task = NULL;
  159. workers[i].set = NULL;
  160. workers[i].terminated_jobs = NULL;
  161. workers[i].sched_ctx = NULL;
  162. //_starpu_barrier_counter_init(&workers[i].tasks_barrier, 1);
  163. //_starpu_barrier_counter_destroy(&workers[i].tasks_barrier);
  164. starpu_pthread_mutex_init(&workers[i].parallel_sect_mutex,NULL);
  165. starpu_pthread_mutex_destroy(&workers[i].parallel_sect_mutex);
  166. starpu_pthread_cond_init(&workers[i].parallel_sect_cond,NULL);
  167. starpu_pthread_cond_destroy(&workers[i].parallel_sect_cond);
  168. }
  169. /* Retrieve combined workers */
  170. struct _starpu_combined_worker * combined_workers = config->combined_workers;
  171. node->dt_recv(node, combined_workers, combined_worker_size);
  172. node->baseworkerid = baseworkerid;
  173. STARPU_PTHREAD_BARRIER_WAIT(&node->init_completed_barrier);
  174. }
  175. /* Function looping on the sink, waiting for tasks to execute.
  176. * If the caller is the host, don't do anything.
  177. */
  178. void _starpu_sink_common_worker(void)
  179. {
  180. struct _starpu_mp_node *node = NULL;
  181. enum _starpu_mp_command command = STARPU_EXIT;
  182. int arg_size = 0;
  183. void *arg = NULL;
  184. int exit_starpu = 0;
  185. enum _starpu_mp_node_kind node_kind = _starpu_sink_common_get_kind();
  186. if (node_kind == STARPU_INVALID_KIND)
  187. _STARPU_ERROR("No valid sink kind retrieved, use the"
  188. "STARPU_SINK environment variable to specify"
  189. "this\n");
  190. /* Create and initialize the node */
  191. node = _starpu_mp_common_node_create(node_kind, -1);
  192. starpu_pthread_key_t worker_key;
  193. STARPU_PTHREAD_KEY_CREATE(&worker_key, NULL);
  194. while (!exit_starpu)
  195. {
  196. /* If we have received a message */
  197. if(node->mp_recv_is_ready(node))
  198. {
  199. command = _starpu_mp_common_recv_command(node, &arg, &arg_size);
  200. switch(command)
  201. {
  202. case STARPU_EXIT:
  203. exit_starpu = 1;
  204. break;
  205. case STARPU_EXECUTE:
  206. node->execute(node, arg, arg_size);
  207. break;
  208. case STARPU_SINK_NBCORES:
  209. _starpu_sink_common_get_nb_cores(node);
  210. break;
  211. case STARPU_LOOKUP:
  212. _starpu_sink_common_lookup(node, (char *) arg);
  213. break;
  214. case STARPU_ALLOCATE:
  215. node->allocate(node, arg, arg_size);
  216. break;
  217. case STARPU_FREE:
  218. node->free(node, arg, arg_size);
  219. break;
  220. case STARPU_RECV_FROM_HOST:
  221. _starpu_sink_common_copy_from_host(node, arg, arg_size);
  222. break;
  223. case STARPU_SEND_TO_HOST:
  224. _starpu_sink_common_copy_to_host(node, arg, arg_size);
  225. break;
  226. case STARPU_RECV_FROM_SINK:
  227. _starpu_sink_common_copy_from_sink(node, arg, arg_size);
  228. break;
  229. case STARPU_SEND_TO_SINK:
  230. _starpu_sink_common_copy_to_sink(node, arg, arg_size);
  231. break;
  232. case STARPU_SYNC_WORKERS:
  233. _starpu_sink_common_recv_workers(node, arg, arg_size);
  234. break;
  235. default:
  236. printf("Oops, command %x unrecognized\n", command);
  237. }
  238. }
  239. pthread_mutex_lock(&node->message_queue_mutex);
  240. /* If the list is not empty */
  241. if(!mp_message_list_empty(node->message_queue))
  242. {
  243. /* We pop a message and send it to the host */
  244. struct mp_message * message = mp_message_list_pop_back(node->message_queue);
  245. pthread_mutex_unlock(&node->message_queue_mutex);
  246. //_STARPU_DEBUG("telling host that we have finished the task %p sur %d.\n", task->kernel, task->coreid);
  247. _starpu_mp_common_send_command(node, message->type,
  248. &message->buffer, message->size);
  249. mp_message_delete(message);
  250. }
  251. else
  252. {
  253. pthread_mutex_unlock(&node->message_queue_mutex);
  254. }
  255. }
  256. /* Deinitialize the node and release it */
  257. _starpu_mp_common_node_destroy(node);
  258. exit(0);
  259. }
  260. /* Search for the mp_barrier correspondind to the specified combined worker
  261. * and create it if it doesn't exist
  262. */
  263. static struct mp_barrier * _starpu_sink_common_get_barrier(struct _starpu_mp_node * node, int cb_workerid, int cb_workersize)
  264. {
  265. struct mp_barrier * b = NULL;
  266. STARPU_PTHREAD_MUTEX_LOCK(&node->barrier_mutex);
  267. /* Search if the barrier already exist */
  268. for(b = mp_barrier_list_begin(node->barrier_list);
  269. b != mp_barrier_list_end(node->barrier_list) && b->id != cb_workerid;
  270. b = mp_barrier_list_next(b));
  271. /* If we found the barrier */
  272. if(b != NULL)
  273. {
  274. STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex);
  275. return b;
  276. }
  277. else
  278. {
  279. /* Else we create, initialize and add it to the list*/
  280. b = mp_barrier_new();
  281. b->id = cb_workerid;
  282. STARPU_PTHREAD_BARRIER_INIT(&b->before_work_barrier,NULL,cb_workersize);
  283. STARPU_PTHREAD_BARRIER_INIT(&b->after_work_barrier,NULL,cb_workersize);
  284. mp_barrier_list_push_back(node->barrier_list,b);
  285. STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex);
  286. return b;
  287. }
  288. }
  289. /* Erase for the mp_barrier correspondind to the specified combined worker
  290. */
  291. static void _starpu_sink_common_erase_barrier(struct _starpu_mp_node * node, struct mp_barrier *barrier)
  292. {
  293. STARPU_PTHREAD_MUTEX_LOCK(&node->barrier_mutex);
  294. mp_barrier_list_erase(node->barrier_list,barrier);
  295. STARPU_PTHREAD_MUTEX_UNLOCK(&node->barrier_mutex);
  296. }
  297. /* Append the message given in parameter to the message list
  298. */
  299. static void _starpu_sink_common_append_message(struct _starpu_mp_node *node, struct mp_message * message)
  300. {
  301. STARPU_PTHREAD_MUTEX_LOCK(&node->message_queue_mutex);
  302. mp_message_list_push_front(node->message_queue,message);
  303. STARPU_PTHREAD_MUTEX_UNLOCK(&node->message_queue_mutex);
  304. }
  305. /* Append to the message list a "STARPU_PRE_EXECUTION" message
  306. */
  307. static void _starpu_sink_common_pre_execution_message(struct _starpu_mp_node *node, struct mp_task *task)
  308. {
  309. /* Init message to tell the sink that the execution has begun */
  310. struct mp_message * message = mp_message_new();
  311. message->type = STARPU_PRE_EXECUTION;
  312. *(int *) message->buffer = task->combined_workerid;
  313. message->size = sizeof(task->combined_workerid);
  314. /* Append the message to the queue */
  315. _starpu_sink_common_append_message(node, message);
  316. }
  317. /* Append to the message list a "STARPU_EXECUTION_COMPLETED" message
  318. */
  319. static void _starpu_sink_common_execution_completed_message(struct _starpu_mp_node *node, struct mp_task *task)
  320. {
  321. /* Init message to tell the sink that the execution is completed */
  322. struct mp_message * message = mp_message_new();
  323. message->type = STARPU_EXECUTION_COMPLETED;
  324. message->size = sizeof(task->coreid);
  325. *(int*) message->buffer = task->coreid;
  326. /* Append the message to the queue */
  327. _starpu_sink_common_append_message(node, message);
  328. }
  329. /* Bind the thread which is running on the specified core to the combined worker */
  330. static void _starpu_sink_common_bind_to_combined_worker(struct _starpu_mp_node *node, int coreid, struct _starpu_combined_worker * combined_worker)
  331. {
  332. int i;
  333. int * bind_set = malloc(sizeof(int)*combined_worker->worker_size);
  334. for(i=0;i<combined_worker->worker_size;i++)
  335. bind_set[i] = combined_worker->combined_workerid[i] - node->baseworkerid;
  336. node->bind_thread(node, coreid, bind_set, combined_worker->worker_size);
  337. }
  338. /* Get the current rank of the worker in the combined worker
  339. */
  340. static int _starpu_sink_common_get_current_rank(int workerid, struct _starpu_combined_worker * combined_worker)
  341. {
  342. int i;
  343. for(i=0; i<combined_worker->worker_size; i++)
  344. if(workerid == combined_worker->combined_workerid[i])
  345. return i;
  346. STARPU_ASSERT(0);
  347. }
  348. /* Execute the task
  349. */
  350. static void _starpu_sink_common_execute_kernel(struct _starpu_mp_node *node, int coreid, struct mp_task *task, struct _starpu_worker * worker)
  351. {
  352. struct _starpu_combined_worker * combined_worker = NULL;
  353. /* If it's a parallel task */
  354. if(task->is_parallel_task)
  355. {
  356. combined_worker = _starpu_get_combined_worker_struct(task->combined_workerid);
  357. worker->current_rank = _starpu_sink_common_get_current_rank(worker->workerid, combined_worker);
  358. worker->combined_workerid = task->combined_workerid;
  359. worker->worker_size = combined_worker->worker_size;
  360. /* Synchronize with others threads of the combined worker*/
  361. STARPU_PTHREAD_BARRIER_WAIT(&task->mp_barrier->before_work_barrier);
  362. /* The first thread of the combined worker */
  363. if(worker->current_rank == 0)
  364. {
  365. /* tell the sink that the execution has begun */
  366. _starpu_sink_common_pre_execution_message(node,task);
  367. /* If the mode is FORKJOIN,
  368. * the first thread binds himself
  369. * on all core of the combined worker*/
  370. if(task->type == STARPU_FORKJOIN)
  371. {
  372. _starpu_sink_common_bind_to_combined_worker(node, coreid, combined_worker);
  373. }
  374. }
  375. }
  376. else
  377. {
  378. worker->current_rank = 0;
  379. worker->combined_workerid = 0;
  380. worker->worker_size = 1;
  381. }
  382. if(task->type != STARPU_FORKJOIN || worker->current_rank == 0)
  383. {
  384. /* execute the task */
  385. task->kernel(task->interfaces,task->cl_arg);
  386. }
  387. /* If it's a parallel task */
  388. if(task->is_parallel_task)
  389. {
  390. /* Synchronize with others threads of the combined worker*/
  391. STARPU_PTHREAD_BARRIER_WAIT(&task->mp_barrier->after_work_barrier);
  392. /* The fisrt thread of the combined */
  393. if(worker->current_rank == 0)
  394. {
  395. /* Erase the barrier from the list */
  396. _starpu_sink_common_erase_barrier(node,task->mp_barrier);
  397. /* If the mode is FORKJOIN,
  398. * the first thread rebinds himself on his own core */
  399. if(task->type == STARPU_FORKJOIN)
  400. node->bind_thread(node, coreid, &coreid, 1);
  401. }
  402. }
  403. node->run_table[coreid] = NULL;
  404. /* tell the sink that the execution is completed */
  405. _starpu_sink_common_execution_completed_message(node,task);
  406. free(task);
  407. }
  408. /* The main function executed by the thread
  409. * thread_arg is a structure containing the information needed by the thread
  410. */
  411. void* _starpu_sink_thread(void * thread_arg)
  412. {
  413. /* Retrieve the information from the structure */
  414. struct _starpu_mp_node *node = ((struct arg_sink_thread *)thread_arg)->node;
  415. sem_t * sem = ((struct arg_sink_thread *)thread_arg)->sem;
  416. int coreid =((struct arg_sink_thread *)thread_arg)->coreid;
  417. /* free the structure */
  418. free(thread_arg);
  419. STARPU_PTHREAD_BARRIER_WAIT(&node->init_completed_barrier);
  420. struct _starpu_worker *worker = &_starpu_get_machine_config()->workers[node->baseworkerid + coreid];
  421. _starpu_set_local_worker_key(worker);
  422. while(node->is_running)
  423. {
  424. /*Wait there is a task available */
  425. sem_wait(sem);
  426. if(node->run_table[coreid] != NULL)
  427. _starpu_sink_common_execute_kernel(node,coreid,node->run_table[coreid],worker);
  428. }
  429. pthread_exit(NULL);
  430. }
  431. /* Add the task to the specific thread and wake him up
  432. */
  433. static void _starpu_sink_common_execute_thread(struct _starpu_mp_node *node, struct mp_task *task)
  434. {
  435. /* Add the task to the specific thread */
  436. node->run_table[task->coreid] = task;
  437. /* Unlock the mutex to wake up the thread which will execute the task */
  438. sem_post(&node->sem_run_table[task->coreid]);
  439. }
  440. /* Receive paquet from _starpu_src_common_execute_kernel in the form below :
  441. * [Function pointer on sink, number of interfaces, interfaces
  442. * (union _starpu_interface), cl_arg]
  443. * Then call the function given, passing as argument an array containing the
  444. * addresses of the received interfaces
  445. */
  446. void _starpu_sink_common_execute(struct _starpu_mp_node *node,
  447. void *arg, int arg_size)
  448. {
  449. unsigned nb_interfaces, i;
  450. void *arg_ptr = arg;
  451. struct mp_task *task = malloc(sizeof(struct mp_task));
  452. task->kernel = *(void(**)(void **, void *)) arg_ptr;
  453. arg_ptr += sizeof(task->kernel);
  454. task->type = *(enum starpu_codelet_type *) arg_ptr;
  455. arg_ptr += sizeof(task->type);
  456. task->is_parallel_task = *(int *) arg_ptr;
  457. arg_ptr += sizeof(task->is_parallel_task);
  458. if(task->is_parallel_task)
  459. {
  460. task->combined_workerid= *(int *) arg_ptr;
  461. arg_ptr += sizeof(task->combined_workerid);
  462. task->mp_barrier = _starpu_sink_common_get_barrier(node,task->combined_workerid,_starpu_get_combined_worker_struct(task->combined_workerid)->worker_size);
  463. }
  464. task->coreid = *(unsigned *) arg_ptr;
  465. arg_ptr += sizeof(task->coreid);
  466. nb_interfaces = *(unsigned *) arg_ptr;
  467. arg_ptr += sizeof(nb_interfaces);
  468. /* The function needs an array pointing to each interface it needs
  469. * during execution. As in sink-side there is no mean to know which
  470. * kind of interface to expect, the array is composed of unions of
  471. * interfaces, thus we expect the same size anyway */
  472. for (i = 0; i < nb_interfaces; i++)
  473. {
  474. task->interfaces[i] = arg_ptr;
  475. arg_ptr += sizeof(union _starpu_interface);
  476. }
  477. /* Was cl_arg sent ? */
  478. if (arg_size > arg_ptr - arg)
  479. task->cl_arg = arg_ptr;
  480. else
  481. task->cl_arg = NULL;
  482. //_STARPU_DEBUG("telling host that we have submitted the task %p.\n", task->kernel);
  483. _starpu_mp_common_send_command(node, STARPU_EXECUTION_SUBMITTED,
  484. NULL, 0);
  485. //_STARPU_DEBUG("executing the task %p\n", task->kernel);
  486. _starpu_sink_common_execute_thread(node, task);
  487. }