sched_ctx.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. #include <core/sched_ctx.h>
  2. #include <common/config.h>
  3. #include <common/utils.h>
  4. #include <core/sched_policy.h>
  5. #include <profiling/profiling.h>
  6. static pthread_cond_t blocking_ths_cond = PTHREAD_COND_INITIALIZER;
  7. static pthread_cond_t wakeup_ths_cond = PTHREAD_COND_INITIALIZER;
  8. static pthread_mutex_t blocking_ths_mutex = PTHREAD_MUTEX_INITIALIZER;
  9. static int nblocked_ths = 0;
  10. int _starpu_create_sched_ctx(const char *policy_name, int *workerids_in_ctx,
  11. int nworkerids_in_ctx, unsigned is_initial_sched,
  12. const char *sched_name)
  13. {
  14. struct starpu_machine_config_s *config = (struct starpu_machine_config_s *)_starpu_get_machine_config();
  15. STARPU_ASSERT(config->topology.nsched_ctxs < STARPU_NMAX_SCHED_CTXS - 1);
  16. struct starpu_sched_ctx *sched_ctx = &config->sched_ctxs[config->topology.nsched_ctxs];
  17. int nworkers = config->topology.nworkers;
  18. STARPU_ASSERT(nworkerids_in_ctx <= nworkers);
  19. sched_ctx->nworkers_in_ctx = nworkerids_in_ctx;
  20. sched_ctx->sched_policy = malloc(sizeof(struct starpu_sched_policy_s));
  21. sched_ctx->is_initial_sched = is_initial_sched;
  22. sched_ctx->sched_name = sched_name;
  23. PTHREAD_COND_INIT(&sched_ctx->submitted_cond, NULL);
  24. PTHREAD_MUTEX_INIT(&sched_ctx->submitted_mutex, NULL);
  25. sched_ctx->nsubmitted = 0;
  26. int j;
  27. /*all the workers are in this contex*/
  28. if(workerids_in_ctx == NULL)
  29. {
  30. for(j = 0; j < nworkers; j++)
  31. {
  32. sched_ctx->workerid[j] = j;
  33. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(j);
  34. workerarg->sched_ctx[workerarg->nctxs++] = sched_ctx;
  35. }
  36. sched_ctx->nworkers_in_ctx = nworkers;
  37. }
  38. else
  39. {
  40. int i;
  41. for(i = 0; i < nworkerids_in_ctx; i++)
  42. {
  43. /*take care the user does not ask for a resource that does not exist*/
  44. STARPU_ASSERT( workerids_in_ctx[i] >= 0 && workerids_in_ctx[i] <= nworkers);
  45. sched_ctx->workerid[i] = workerids_in_ctx[i];
  46. for(j = 0; j < nworkers; j++)
  47. {
  48. if(sched_ctx->workerid[i] == j)
  49. {
  50. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(j);
  51. workerarg->sched_ctx[workerarg->nctxs++] = sched_ctx;
  52. }
  53. }
  54. }
  55. }
  56. _starpu_init_sched_policy(config, sched_ctx, policy_name);
  57. sched_ctx->sched_ctx_id = config->topology.nsched_ctxs;
  58. config->topology.nsched_ctxs++;
  59. return sched_ctx->sched_ctx_id;
  60. }
  61. void _starpu_decrement_nblocked_ths(void)
  62. {
  63. PTHREAD_MUTEX_LOCK(&blocking_ths_mutex);
  64. if(--nblocked_ths == 0)
  65. PTHREAD_COND_BROADCAST(&wakeup_ths_cond);
  66. PTHREAD_MUTEX_UNLOCK(&blocking_ths_mutex);
  67. }
  68. void _starpu_increment_nblocked_ths(int nworkers)
  69. {
  70. PTHREAD_MUTEX_LOCK(&blocking_ths_mutex);
  71. if (++nblocked_ths == nworkers)
  72. PTHREAD_COND_BROADCAST(&blocking_ths_cond);
  73. PTHREAD_MUTEX_UNLOCK(&blocking_ths_mutex);
  74. }
  75. static int _starpu_wait_for_all_threads_to_block(int nworkers)
  76. {
  77. PTHREAD_MUTEX_LOCK(&blocking_ths_mutex);
  78. while (nblocked_ths < nworkers)
  79. PTHREAD_COND_WAIT(&blocking_ths_cond, &blocking_ths_mutex);
  80. PTHREAD_MUTEX_UNLOCK(&blocking_ths_mutex);
  81. return 0;
  82. }
  83. static int _starpu_wait_for_all_threads_to_wake_up(void)
  84. {
  85. PTHREAD_MUTEX_LOCK(&blocking_ths_mutex);
  86. while (nblocked_ths > 0)
  87. PTHREAD_COND_WAIT(&wakeup_ths_cond, &blocking_ths_mutex);
  88. PTHREAD_MUTEX_UNLOCK(&blocking_ths_mutex);
  89. return 0;
  90. }
  91. static int set_changing_ctx_flag(starpu_worker_status changing_ctx, int nworkerids_in_ctx, int *workerids_in_ctx)
  92. {
  93. struct starpu_machine_config_s *config = _starpu_get_machine_config();
  94. int i;
  95. int nworkers = nworkerids_in_ctx == -1 ? (int)config->topology.nworkers : nworkerids_in_ctx;
  96. struct starpu_worker_s *worker = NULL;
  97. pthread_mutex_t *changing_ctx_mutex = NULL;
  98. pthread_cond_t *changing_ctx_cond = NULL;
  99. int workerid = -1;
  100. for(i = 0; i < nworkers; i++)
  101. {
  102. workerid = workerids_in_ctx == NULL ? i : workerids_in_ctx[i];
  103. worker = _starpu_get_worker_struct(workerid);
  104. changing_ctx_mutex = &worker->changing_ctx_mutex;
  105. changing_ctx_cond = &worker->changing_ctx_cond;
  106. /*if the status is CHANGING_CTX let the thread know that it must block*/
  107. PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
  108. worker->status = changing_ctx;
  109. worker->nworkers_of_next_ctx = nworkers;
  110. PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
  111. /*if we have finished changing the ctx wake up the blocked threads*/
  112. if(changing_ctx == STATUS_UNKNOWN)
  113. {
  114. PTHREAD_MUTEX_LOCK(changing_ctx_mutex);
  115. PTHREAD_COND_SIGNAL(changing_ctx_cond);
  116. PTHREAD_MUTEX_UNLOCK(changing_ctx_mutex);
  117. }
  118. }
  119. /*after letting know all the concerned threads about the change
  120. wait for them to take into account the info*/
  121. if(changing_ctx == STATUS_CHANGING_CTX)
  122. _starpu_wait_for_all_threads_to_block(nworkers);
  123. else
  124. _starpu_wait_for_all_threads_to_wake_up();
  125. return 0;
  126. }
  127. int starpu_create_sched_ctx(const char *policy_name, int *workerids_in_ctx,
  128. int nworkerids_in_ctx, const char *sched_name)
  129. {
  130. int ret;
  131. /* block the workers until the contex is switched */
  132. set_changing_ctx_flag(STATUS_CHANGING_CTX, nworkerids_in_ctx, workerids_in_ctx);
  133. ret = _starpu_create_sched_ctx(policy_name, workerids_in_ctx, nworkerids_in_ctx, 0, sched_name);
  134. /* also wait the workers to wake up before using the context */
  135. set_changing_ctx_flag(STATUS_UNKNOWN, nworkerids_in_ctx, workerids_in_ctx);
  136. return ret;
  137. }
  138. static unsigned _starpu_worker_belongs_to_ctx(struct starpu_worker_s *workerarg, struct starpu_sched_ctx *sched_ctx)
  139. {
  140. unsigned i;
  141. for(i = 0; i < workerarg->nctxs; i++)
  142. if(sched_ctx != NULL && workerarg->sched_ctx[i] == sched_ctx
  143. && workerarg->status != STATUS_JOINED)
  144. return 1;
  145. return 0;
  146. }
  147. static void _starpu_remove_sched_ctx_from_worker(struct starpu_worker_s *workerarg, struct starpu_sched_ctx *sched_ctx)
  148. {
  149. unsigned i;
  150. unsigned to_remove = 0;
  151. for(i = 0; i < workerarg->nctxs; i++)
  152. {
  153. if(sched_ctx != NULL && workerarg->sched_ctx[i] == sched_ctx
  154. && workerarg->status != STATUS_JOINED)
  155. {
  156. workerarg->sched_ctx[i] = NULL;
  157. to_remove = 1;
  158. }
  159. }
  160. /* if the the worker had belonged to the context it would have been found in the worker's list of sched_ctxs, so it can be removed */
  161. if(to_remove)
  162. workerarg->nctxs--;
  163. return;
  164. }
  165. void starpu_delete_sched_ctx(unsigned sched_ctx_id)
  166. {
  167. if(!starpu_wait_for_all_tasks_of_sched_ctx(sched_ctx_id))
  168. {
  169. struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
  170. int nworkers = sched_ctx->nworkers_in_ctx;
  171. int workerid;
  172. int i;
  173. for(i = 0; i < nworkers; i++)
  174. {
  175. workerid = sched_ctx->workerid[i];
  176. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
  177. _starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
  178. }
  179. free(sched_ctx->sched_policy);
  180. sched_ctx->sched_policy = NULL;
  181. }
  182. return;
  183. }
  184. void _starpu_delete_all_sched_ctxs()
  185. {
  186. struct starpu_machine_config_s *config = _starpu_get_machine_config();
  187. unsigned nsched_ctxs = config->topology.nsched_ctxs;
  188. unsigned i;
  189. for(i = 0; i < nsched_ctxs; i++)
  190. {
  191. starpu_delete_sched_ctx((int)i);
  192. }
  193. return;
  194. }
  195. int starpu_wait_for_all_tasks_of_worker(int workerid)
  196. {
  197. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  198. return -EDEADLK;
  199. struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid);
  200. PTHREAD_MUTEX_LOCK(&worker->submitted_mutex);
  201. while (worker->nsubmitted > 0)
  202. PTHREAD_COND_WAIT(&worker->submitted_cond, &worker->submitted_mutex);
  203. PTHREAD_MUTEX_UNLOCK(&worker->submitted_mutex);
  204. return 0;
  205. }
  206. int starpu_wait_for_all_tasks_of_workers(int *workerids_in_ctx, int nworkerids_in_ctx){
  207. int ret_val = 0;
  208. struct starpu_machine_config_s *config = _starpu_get_machine_config();
  209. int nworkers = nworkerids_in_ctx == -1 ? (int)config->topology.nworkers : nworkerids_in_ctx;
  210. int workerid = -1;
  211. int i, n;
  212. for(i = 0; i < nworkers; i++)
  213. {
  214. workerid = workerids_in_ctx == NULL ? i : workerids_in_ctx[i];
  215. n = starpu_wait_for_all_tasks_of_worker(workerid);
  216. ret_val = (ret_val && n);
  217. }
  218. return ret_val;
  219. }
  220. void _starpu_decrement_nsubmitted_tasks_of_worker(int workerid)
  221. {
  222. struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid);
  223. PTHREAD_MUTEX_LOCK(&worker->submitted_mutex);
  224. if (--worker->nsubmitted == 0)
  225. PTHREAD_COND_BROADCAST(&worker->submitted_cond);
  226. PTHREAD_MUTEX_UNLOCK(&worker->submitted_mutex);
  227. return;
  228. }
  229. void _starpu_increment_nsubmitted_tasks_of_worker(int workerid)
  230. {
  231. struct starpu_worker_s *worker = _starpu_get_worker_struct(workerid);
  232. PTHREAD_MUTEX_LOCK(&worker->submitted_mutex);
  233. worker->nsubmitted++;
  234. PTHREAD_MUTEX_UNLOCK(&worker->submitted_mutex);
  235. return;
  236. }
  237. static void _starpu_add_workers_to_sched_ctx(int *workerids_in_ctx, int nworkerids_in_ctx,
  238. struct starpu_sched_ctx *sched_ctx)
  239. {
  240. struct starpu_machine_config_s *config = (struct starpu_machine_config_s *)_starpu_get_machine_config();
  241. int nworkers = config->topology.nworkers;
  242. STARPU_ASSERT((nworkerids_in_ctx + sched_ctx->nworkers_in_ctx) <= nworkers);
  243. int nworkerids_already_in_ctx = sched_ctx->nworkers_in_ctx;
  244. int j;
  245. /*if null add the rest of the workers which don't already belong to this ctx*/
  246. if(workerids_in_ctx == NULL)
  247. {
  248. for(j = 0; j < nworkers; j++)
  249. {
  250. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(j);
  251. if(!_starpu_worker_belongs_to_ctx(workerarg, sched_ctx))
  252. {
  253. sched_ctx->workerid[++nworkerids_already_in_ctx] = j;
  254. workerarg->sched_ctx[workerarg->nctxs++] = sched_ctx;
  255. }
  256. sched_ctx->nworkers_in_ctx = nworkers;
  257. }
  258. }
  259. else
  260. {
  261. int i;
  262. for(i = 0; i < nworkerids_in_ctx; i++)
  263. {
  264. /*take care the user does not ask for a resource that does not exist*/
  265. STARPU_ASSERT( workerids_in_ctx[i] >= 0 && workerids_in_ctx[i] <= nworkers);
  266. sched_ctx->workerid[ nworkerids_already_in_ctx + i] = workerids_in_ctx[i];
  267. for(j = 0; j < nworkers; j++)
  268. {
  269. if(sched_ctx->workerid[i] == j)
  270. {
  271. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(j);
  272. workerarg->sched_ctx[workerarg->nctxs++] = sched_ctx;
  273. }
  274. }
  275. }
  276. sched_ctx->nworkers_in_ctx = nworkerids_in_ctx;
  277. }
  278. return;
  279. }
  280. void starpu_add_workers_to_sched_ctx(int *workerids_in_ctx, int nworkerids_in_ctx,
  281. unsigned sched_ctx_id)
  282. {
  283. struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
  284. /* block the workers until the contex is switched */
  285. set_changing_ctx_flag(STATUS_CHANGING_CTX, nworkerids_in_ctx, workerids_in_ctx);
  286. _starpu_add_workers_to_sched_ctx(workerids_in_ctx, nworkerids_in_ctx, sched_ctx);
  287. /* also wait the workers to wake up before using the context */
  288. set_changing_ctx_flag(STATUS_UNKNOWN, nworkerids_in_ctx, workerids_in_ctx);
  289. return;
  290. }
  291. static int _starpu_get_first_free_space(int *workerids, int old_nworkerids_in_ctx)
  292. {
  293. int i;
  294. for(i = 0; i < old_nworkerids_in_ctx; i++)
  295. if(workerids[i] == -1)
  296. return i;
  297. return -1;
  298. }
  299. /* rearange array of workerids in order not to have {-1, -1, 5, -1, 7}
  300. and have instead {5, 7, -1, -1, -1}
  301. it is easier afterwards to iterate the array
  302. */
  303. static void _starpu_rearange_sched_ctx_workerids(struct starpu_sched_ctx *sched_ctx, int old_nworkerids_in_ctx)
  304. {
  305. int first_free_id = -1;
  306. int i;
  307. for(i = 0; i < old_nworkerids_in_ctx; i++)
  308. {
  309. if(sched_ctx->workerid[i] != -1)
  310. {
  311. first_free_id = _starpu_get_first_free_space(sched_ctx->workerid,
  312. old_nworkerids_in_ctx);
  313. if(first_free_id != -1)
  314. {
  315. sched_ctx->workerid[first_free_id] = sched_ctx->workerid[i];
  316. sched_ctx->workerid[i] = -1;
  317. }
  318. }
  319. }
  320. }
  321. static void _starpu_remove_workers_from_sched_ctx(int *workerids_in_ctx, int nworkerids_in_ctx,
  322. struct starpu_sched_ctx *sched_ctx)
  323. {
  324. struct starpu_machine_config_s *config = (struct starpu_machine_config_s *)_starpu_get_machine_config();
  325. int nworkers = config->topology.nworkers;
  326. int nworkerids_already_in_ctx = sched_ctx->nworkers_in_ctx;
  327. STARPU_ASSERT(nworkerids_in_ctx <= nworkerids_already_in_ctx);
  328. int i, workerid;
  329. /*if null remove all the workers that belong to this ctx*/
  330. if(workerids_in_ctx == NULL)
  331. {
  332. for(i = 0; i < nworkerids_already_in_ctx; i++)
  333. {
  334. workerid = sched_ctx->workerid[i];
  335. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
  336. _starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
  337. sched_ctx->workerid[i] = -1;
  338. }
  339. sched_ctx->nworkers_in_ctx = 0;
  340. }
  341. else
  342. {
  343. for(i = 0; i < nworkerids_in_ctx; i++)
  344. {
  345. workerid = workerids_in_ctx[i];
  346. /* take care the user does not ask for a resource that does not exist */
  347. STARPU_ASSERT( workerid >= 0 && workerid <= nworkers);
  348. struct starpu_worker_s *workerarg = _starpu_get_worker_struct(workerid);
  349. _starpu_remove_sched_ctx_from_worker(workerarg, sched_ctx);
  350. int j;
  351. /* don't leave the workerid with a correct value even if we don't use it anymore */
  352. for(j = 0; j < nworkerids_already_in_ctx; j++)
  353. if(sched_ctx->workerid[j] == workerid)
  354. sched_ctx->workerid[j] = -1;
  355. }
  356. sched_ctx->nworkers_in_ctx -= nworkerids_in_ctx;
  357. _starpu_rearange_sched_ctx_workerids(sched_ctx, nworkerids_already_in_ctx);
  358. }
  359. return;
  360. }
  361. void starpu_remove_workers_from_sched_ctx(int *workerids_in_ctx, int nworkerids_in_ctx,
  362. unsigned sched_ctx_id)
  363. {
  364. /* wait for the workers concerned by the change of contex
  365. * to finish their work in the previous context */
  366. if(!starpu_wait_for_all_tasks_of_workers(workerids_in_ctx, nworkerids_in_ctx))
  367. {
  368. struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
  369. /* block the workers until the contex is switched */
  370. set_changing_ctx_flag(STATUS_CHANGING_CTX, nworkerids_in_ctx, workerids_in_ctx);
  371. _starpu_remove_workers_from_sched_ctx(workerids_in_ctx, nworkerids_in_ctx, sched_ctx);
  372. /* also wait the workers to wake up before using the context */
  373. set_changing_ctx_flag(STATUS_UNKNOWN, nworkerids_in_ctx, workerids_in_ctx);
  374. }
  375. return;
  376. }
  377. int starpu_wait_for_all_tasks_of_sched_ctx(unsigned sched_ctx_id)
  378. {
  379. struct starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx(sched_ctx_id);
  380. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  381. return -EDEADLK;
  382. PTHREAD_MUTEX_LOCK(&sched_ctx->submitted_mutex);
  383. while (sched_ctx->nsubmitted > 0)
  384. PTHREAD_COND_WAIT(&sched_ctx->submitted_cond, &sched_ctx->submitted_mutex);
  385. PTHREAD_MUTEX_UNLOCK(&sched_ctx->submitted_mutex);
  386. return 0;
  387. }
  388. void _starpu_decrement_nsubmitted_tasks_of_sched_ctx(struct starpu_sched_ctx *sched_ctx)
  389. {
  390. PTHREAD_MUTEX_LOCK(&sched_ctx->submitted_mutex);
  391. if (--sched_ctx->nsubmitted == 0)
  392. PTHREAD_COND_BROADCAST(&sched_ctx->submitted_cond);
  393. PTHREAD_MUTEX_UNLOCK(&sched_ctx->submitted_mutex);
  394. }
  395. void _starpu_increment_nsubmitted_tasks_of_sched_ctx(struct starpu_sched_ctx *sched_ctx)
  396. {
  397. PTHREAD_MUTEX_LOCK(&sched_ctx->submitted_mutex);
  398. sched_ctx->nsubmitted++;
  399. PTHREAD_MUTEX_UNLOCK(&sched_ctx->submitted_mutex);
  400. }