openmp_runtime_support.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2014 Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #ifdef STARPU_OPENMP
  18. /*
  19. * locally disable -Wdeprecated-declarations to avoid
  20. * lots of deprecated warnings for ucontext related functions
  21. */
  22. #pragma GCC diagnostic push
  23. #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  24. #include <util/openmp_runtime_support.h>
  25. #include <core/task.h>
  26. #include <stdlib.h>
  27. #include <ctype.h>
  28. #include <strings.h>
  29. #define _STARPU_STACKSIZE 2097152
  30. static struct starpu_omp_global _global_state;
  31. static starpu_pthread_key_t omp_thread_key;
  32. static starpu_pthread_key_t omp_task_key;
  33. struct starpu_omp_global *_starpu_omp_global_state = NULL;
  34. double _starpu_omp_clock_ref = 0.0; /* clock reference for starpu_omp_get_wtick */
  35. static struct starpu_omp_device *create_omp_device_struct(void)
  36. {
  37. struct starpu_omp_device *dev = malloc(sizeof(*dev));
  38. if (dev == NULL)
  39. _STARPU_ERROR("memory allocation failed");
  40. /* TODO: initialize dev->icvs with proper values */
  41. memset(&dev->icvs, 0, sizeof(dev->icvs));
  42. return dev;
  43. }
  44. static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device, int nb_threads)
  45. {
  46. struct starpu_omp_region *region = malloc(sizeof(*region));
  47. if (region == NULL)
  48. _STARPU_ERROR("memory allocation failed");
  49. region->parent_region = parent_region;
  50. region->initial_nested_region = NULL;
  51. region->owner_device = owner_device;
  52. region->thread_list = starpu_omp_thread_list_new();
  53. region->implicit_task_list = starpu_omp_task_list_new();
  54. region->nb_threads = nb_threads;
  55. region->level = (parent_region != NULL)?parent_region->level+1:0;
  56. return region;
  57. }
  58. static void omp_initial_thread_func(void)
  59. {
  60. struct starpu_omp_region *init_region = _global_state.initial_region;
  61. struct starpu_omp_thread *init_thread = _global_state.initial_thread;
  62. struct starpu_omp_task *init_task = _global_state.initial_task;
  63. struct starpu_task *continuation_task = init_region->initial_nested_region->continuation_starpu_task;
  64. while (1)
  65. {
  66. starpu_driver_run_once(&init_thread->starpu_driver);
  67. /*
  68. * if we are leaving the first nested region we give control back to initial task
  69. * otherwise, we should continue to execute work
  70. */
  71. if (_starpu_task_test_termination(continuation_task))
  72. {
  73. swapcontext(&init_thread->ctx, &init_task->ctx);
  74. }
  75. }
  76. }
  77. /*
  78. * setup the main application thread to handle the possible preemption of the initial task
  79. */
  80. static void omp_initial_thread_setup(void)
  81. {
  82. struct starpu_omp_thread *initial_thread = _global_state.initial_thread;
  83. struct starpu_omp_task *initial_task = _global_state.initial_task;
  84. /* .current_task */
  85. initial_thread->current_task = initial_task;
  86. /* .owner_region already set in create_omp_thread_struct */
  87. /* .initial_thread_stack */
  88. initial_thread->initial_thread_stack = malloc(_STARPU_STACKSIZE);
  89. if (initial_thread->initial_thread_stack == NULL)
  90. _STARPU_ERROR("memory allocation failed");
  91. /* .ctx */
  92. getcontext(&initial_thread->ctx);
  93. /*
  94. * we do not use uc_link, the initial thread always should give hand back to the initial task
  95. */
  96. initial_thread->ctx.uc_link = NULL;
  97. initial_thread->ctx.uc_stack.ss_sp = initial_thread->initial_thread_stack;
  98. initial_thread->ctx.uc_stack.ss_size = _STARPU_STACKSIZE;
  99. makecontext(&initial_thread->ctx, omp_initial_thread_func, 0);
  100. /* .starpu_driver */
  101. /*
  102. * we configure starpu to not launch CPU worker 0
  103. * because we will use the main thread to play the role of worker 0
  104. */
  105. struct starpu_conf conf;
  106. int ret = starpu_conf_init(&conf);
  107. STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init");
  108. initial_thread->starpu_driver.type = STARPU_CPU_WORKER;
  109. initial_thread->starpu_driver.id.cpu_id = 0;
  110. conf.not_launched_drivers = &initial_thread->starpu_driver;
  111. conf.n_not_launched_drivers = 1;
  112. ret = starpu_init(&conf);
  113. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  114. ret = starpu_driver_init(&initial_thread->starpu_driver);
  115. STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init");
  116. }
  117. static void omp_initial_thread_exit()
  118. {
  119. struct starpu_omp_thread *initial_thread = _global_state.initial_thread;
  120. int ret = starpu_driver_deinit(&initial_thread->starpu_driver);
  121. STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit");
  122. starpu_shutdown();
  123. /* TODO: free initial_thread data structures */
  124. }
  125. static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region)
  126. {
  127. struct starpu_omp_thread *thread = malloc(sizeof(*thread));
  128. if (thread == NULL)
  129. _STARPU_ERROR("memory allocation failed");
  130. /* .current_task */
  131. thread->current_task = NULL;
  132. /* .owner_region */
  133. thread->owner_region = owner_region;
  134. /* .primary_task */
  135. thread->primary_task = NULL;
  136. /* .init_thread_stack */
  137. thread->initial_thread_stack = NULL;
  138. /* .ctx */
  139. memset(&thread->ctx, 0, sizeof(thread->ctx));
  140. /* .starpu_driver will be initialized later on */
  141. return thread;
  142. }
  143. static void starpu_omp_task_entry(struct starpu_omp_task *task)
  144. {
  145. task->f(task->starpu_buffers, task->starpu_cl_arg);
  146. task->state = starpu_omp_task_state_terminated;
  147. struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
  148. /*
  149. * the task reached the terminated state, definitively give hand back to the worker code.
  150. *
  151. * about to run on the worker stack...
  152. */
  153. setcontext(&thread->ctx);
  154. STARPU_ASSERT(0); /* unreachable code */
  155. }
  156. /*
  157. * stop executing a task that is about to block
  158. * and give hand back to the thread
  159. */
  160. static void starpu_omp_task_preempt(void)
  161. {
  162. struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
  163. struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
  164. task->state = starpu_omp_task_state_preempted;
  165. /*
  166. * the task reached a blocked state, give hand back to the worker code.
  167. *
  168. * about to run on the worker stack...
  169. */
  170. swapcontext(&task->ctx, &thread->ctx);
  171. /* now running on the task stack again */
  172. }
  173. /*
  174. * wrap a task function to allow the task to be preempted
  175. */
  176. static void starpu_omp_task_exec(void *buffers[], void *cl_arg)
  177. {
  178. struct starpu_omp_task *task = starpu_task_get_current()->omp_task;
  179. STARPU_PTHREAD_SETSPECIFIC(omp_task_key, task);
  180. struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
  181. if (thread == NULL)
  182. {
  183. /*
  184. * this is the first time an omp task is launched on the current worker.
  185. * this first task should be an implicit parallel region task.
  186. */
  187. if (!task->is_implicit)
  188. _STARPU_ERROR("unexpected omp task\n");
  189. thread = task->owner_thread;
  190. STARPU_ASSERT(thread->owner_region != NULL);
  191. STARPU_ASSERT(thread->owner_region == task->owner_region);
  192. thread->primary_task = task;
  193. /*
  194. * make this worker an omp-enabled worker
  195. */
  196. STARPU_PTHREAD_SETSPECIFIC(omp_thread_key, thread);
  197. }
  198. if (task->state != starpu_omp_task_state_preempted)
  199. {
  200. task->starpu_buffers = buffers;
  201. task->starpu_cl_arg = cl_arg;
  202. }
  203. task->state = starpu_omp_task_state_clear;
  204. /*
  205. * start the task execution, or restore a previously preempted task.
  206. * about to run on the task stack...
  207. * */
  208. swapcontext(&thread->ctx, &task->ctx);
  209. /* now running on the worker stack again */
  210. STARPU_ASSERT(task->state == starpu_omp_task_state_preempted
  211. || task->state == starpu_omp_task_state_terminated);
  212. STARPU_PTHREAD_SETSPECIFIC(omp_task_key, NULL);
  213. if (task->state == starpu_omp_task_state_terminated && task == thread->primary_task)
  214. {
  215. /*
  216. * make this worker an omp-disabled worker
  217. */
  218. STARPU_PTHREAD_SETSPECIFIC(omp_thread_key, NULL);
  219. thread->primary_task = NULL;
  220. /*
  221. * make sure this worker wont be used for running omp tasks
  222. * until a new region is created
  223. */
  224. thread->owner_region = NULL;
  225. }
  226. /* TODO: analyse the cause of the return and take appropriate steps */
  227. }
  228. /*
  229. * prepare the starpu_task fields of a currently running task
  230. * for accepting a new set of dependencies in anticipation of a preemption
  231. *
  232. * when the task becomes preempted, it will only be queued again when the new
  233. * set of dependencies is fulfilled
  234. */
  235. static void _starpu_task_prepare_for_preemption(struct starpu_task *starpu_task)
  236. {
  237. /* TODO: implement funciton */
  238. (void)starpu_task;
  239. }
  240. static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task,
  241. struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit)
  242. {
  243. struct starpu_omp_task *task = malloc(sizeof(*task));
  244. if (task == NULL)
  245. _STARPU_ERROR("memory allocation failed");
  246. task->parent_task = parent_task;
  247. task->owner_thread = owner_thread;
  248. task->owner_region = owner_region;
  249. task->is_implicit = is_implicit;
  250. /* TODO: initialize task->data_env_icvs with proper values */
  251. memset(&task->data_env_icvs, 0, sizeof(task->data_env_icvs));
  252. if (is_implicit)
  253. {
  254. /* TODO: initialize task->implicit_task_icvs with proper values */
  255. memset(&task->implicit_task_icvs, 0, sizeof(task->implicit_task_icvs));
  256. }
  257. task->starpu_task = NULL;
  258. task->starpu_buffers = NULL;
  259. task->starpu_cl_arg = NULL;
  260. task->f = NULL;
  261. task->state = starpu_omp_task_state_clear;
  262. if (parent_task == NULL)
  263. {
  264. /* do not allocate a stack for the initial task */
  265. task->stack = NULL;
  266. memset(&task->ctx, 0, sizeof(task->ctx));
  267. }
  268. else
  269. {
  270. /* TODO: use ICV stack size info instead */
  271. task->stack = malloc(_STARPU_STACKSIZE);
  272. if (task->stack == NULL)
  273. _STARPU_ERROR("memory allocation failed");
  274. getcontext(&task->ctx);
  275. /*
  276. * we do not use uc_link, starpu_omp_task_entry will handle
  277. * the end of the task
  278. */
  279. task->ctx.uc_link = NULL;
  280. task->ctx.uc_stack.ss_sp = task->stack;
  281. task->ctx.uc_stack.ss_size = _STARPU_STACKSIZE;
  282. makecontext(&task->ctx, (void (*) ()) starpu_omp_task_entry, 1, task);
  283. }
  284. return task;
  285. }
  286. /*
  287. * Entry point to be called by the OpenMP runtime constructor
  288. */
  289. int starpu_omp_init(void)
  290. {
  291. _starpu_omp_environment_init();
  292. _global_state.icvs.cancel_var = _starpu_omp_initial_icv_values->cancel_var;
  293. _global_state.initial_device = create_omp_device_struct();
  294. _global_state.initial_region = create_omp_region_struct(NULL, _global_state.initial_device, 1);
  295. _global_state.initial_thread = create_omp_thread_struct(_global_state.initial_region);
  296. starpu_omp_thread_list_push_back(_global_state.initial_region->thread_list,
  297. _global_state.initial_thread);
  298. _global_state.initial_task = create_omp_task_struct(NULL,
  299. _global_state.initial_thread, _global_state.initial_region, 1);
  300. _starpu_omp_global_state = &_global_state;
  301. STARPU_PTHREAD_KEY_CREATE(&omp_thread_key, NULL);
  302. STARPU_PTHREAD_KEY_CREATE(&omp_task_key, NULL);
  303. omp_initial_thread_setup();
  304. /* init clock reference for starpu_omp_get_wtick */
  305. _starpu_omp_clock_ref = starpu_timing_now();
  306. return 0;
  307. }
  308. void starpu_omp_shutdown(void)
  309. {
  310. omp_initial_thread_exit();
  311. STARPU_PTHREAD_KEY_DELETE(omp_task_key);
  312. STARPU_PTHREAD_KEY_DELETE(omp_thread_key);
  313. /* TODO: free ICV variables */
  314. /* TODO: free task/thread/region/device structures */
  315. }
  316. void starpu_parallel_region(struct starpu_codelet *parallel_region_cl, void *parallel_region_cl_arg)
  317. {
  318. struct starpu_omp_thread *master_thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
  319. struct starpu_omp_task *parent_task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
  320. struct starpu_omp_region *parent_region = parent_task->owner_region;
  321. int ret;
  322. /* TODO: compute the proper nb_threads and launch additional workers as needed.
  323. * for now, the level 1 parallel region spans all the threads
  324. * and level >= 2 parallel regions have only one thread */
  325. int nb_threads = (parent_region->level == 0)?starpu_cpu_worker_get_count():1;
  326. struct starpu_omp_region *new_region =
  327. create_omp_region_struct(parent_region, _global_state.initial_device, 1);
  328. int i;
  329. for (i = 0; i < nb_threads; i++)
  330. {
  331. struct starpu_omp_thread *new_thread =
  332. (i == 0) ? master_thread : create_omp_thread_struct(new_region);
  333. /* TODO: specify actual starpu worker */
  334. starpu_omp_thread_list_push_back(new_region->thread_list, new_thread);
  335. struct starpu_omp_task *new_task = create_omp_task_struct(parent_task, new_thread, new_region, 1);
  336. starpu_omp_task_list_push_back(new_region->implicit_task_list, new_task);
  337. }
  338. /*
  339. * if parent_task == initial_task, create a starpu task as a continuation to all the implicit
  340. * tasks of the new region, else prepare the parent_task for preemption,
  341. * to become itself a continuation to the implicit tasks of the new region
  342. */
  343. if (parent_task == _global_state.initial_task)
  344. {
  345. new_region->continuation_starpu_task = starpu_task_create();
  346. /* in that case, the continuation starpu task is only used for synchronisation */
  347. new_region->continuation_starpu_task->cl = NULL;
  348. parent_region->initial_nested_region = new_region;
  349. }
  350. else
  351. {
  352. /* through the preemption, the parent starpu task becomes the continuation task */
  353. _starpu_task_prepare_for_preemption(parent_task->starpu_task);
  354. new_region->continuation_starpu_task = parent_task->starpu_task;
  355. }
  356. /*
  357. * save pointer to the regions user function from the parallel region codelet
  358. *
  359. * TODO: add support for multiple/heterogeneous implementations
  360. */
  361. void (*parallel_region_f)(void **starpu_buffers, void *starpu_cl_arg) = parallel_region_cl->cpu_funcs[0];
  362. /*
  363. * plug the task wrapper into the parallel region codelet instead, to support task preemption
  364. */
  365. parallel_region_cl->cpu_funcs[0] = starpu_omp_task_exec;
  366. /*
  367. * create the starpu tasks for the implicit omp tasks,
  368. * create explicit dependencies between these starpu tasks and the continuation starpu task
  369. */
  370. struct starpu_omp_task * implicit_task;
  371. for (implicit_task = starpu_omp_task_list_begin(new_region->implicit_task_list);
  372. implicit_task != starpu_omp_task_list_end(new_region->implicit_task_list);
  373. implicit_task = starpu_omp_task_list_next(implicit_task))
  374. {
  375. implicit_task->f = parallel_region_f;
  376. implicit_task->starpu_task = starpu_task_create();
  377. implicit_task->starpu_task->cl = parallel_region_cl;
  378. implicit_task->starpu_task->cl_arg = parallel_region_cl_arg;
  379. starpu_task_declare_deps_array(new_region->continuation_starpu_task, 1, &implicit_task->starpu_task);
  380. }
  381. /*
  382. * submit all the region implicit starpu tasks
  383. */
  384. for (implicit_task = starpu_omp_task_list_begin(new_region->implicit_task_list);
  385. implicit_task != starpu_omp_task_list_end(new_region->implicit_task_list);
  386. implicit_task = starpu_omp_task_list_next(implicit_task))
  387. {
  388. ret = starpu_task_submit(implicit_task->starpu_task);
  389. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  390. }
  391. /*
  392. * submit the region continuation starpu task if parent_task == initial_task
  393. */
  394. if (parent_task == _global_state.initial_task)
  395. {
  396. ret = _starpu_task_submit_internally(new_region->continuation_starpu_task);
  397. STARPU_CHECK_RETURN_VALUE(ret, "_starpu_task_submit_internally");
  398. }
  399. /*
  400. * preempt for completion of the region
  401. */
  402. starpu_omp_task_preempt();
  403. /*
  404. * TODO: free region resources
  405. */
  406. }
  407. /*
  408. * restore deprecated diagnostics (-Wdeprecated-declarations)
  409. */
  410. #pragma GCC diagnostic pop
  411. #endif /* STARPU_OPENMP */