openmp_runtime_support.c 76 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2014, 2017 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #ifdef STARPU_OPENMP
  18. /*
  19. * locally disable -Wdeprecated-declarations to avoid
  20. * lots of deprecated warnings for ucontext related functions
  21. */
  22. #pragma GCC diagnostic push
  23. #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  24. #include <util/openmp_runtime_support.h>
  25. #include <core/task.h>
  26. #include <core/workers.h>
  27. #include <common/list.h>
  28. #include <common/starpu_spinlock.h>
  29. #include <common/uthash.h>
  30. #include <datawizard/interfaces/data_interface.h>
  31. #include <stdlib.h>
  32. #include <ctype.h>
  33. #include <strings.h>
  34. #define _STARPU_INITIAL_THREAD_STACKSIZE 2097152
  35. static struct starpu_omp_global _global_state;
  36. static starpu_pthread_key_t omp_thread_key;
  37. static starpu_pthread_key_t omp_task_key;
  38. struct starpu_omp_global *_starpu_omp_global_state = NULL;
  39. double _starpu_omp_clock_ref = 0.0; /* clock reference for starpu_omp_get_wtick */
  40. static struct starpu_omp_critical *create_omp_critical_struct(void);
  41. static void destroy_omp_critical_struct(struct starpu_omp_critical *critical);
  42. static struct starpu_omp_device *create_omp_device_struct(void);
  43. static void destroy_omp_device_struct(struct starpu_omp_device *device);
  44. static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device);
  45. static void destroy_omp_region_struct(struct starpu_omp_region *region);
  46. static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region);
  47. static void destroy_omp_thread_struct(struct starpu_omp_thread *thread);
  48. static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task,
  49. struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit);
  50. static void destroy_omp_task_struct(struct starpu_omp_task *task);
  51. static void wake_up_and_unlock_task(struct starpu_omp_task *task);
  52. static void wake_up_barrier(struct starpu_omp_region *parallel_region);
  53. static void starpu_omp_task_preempt(void);
  54. struct starpu_omp_thread *_starpu_omp_get_thread(void)
  55. {
  56. struct starpu_omp_thread *thread = STARPU_PTHREAD_GETSPECIFIC(omp_thread_key);
  57. return thread;
  58. }
  59. static inline void _starpu_omp_set_thread(struct starpu_omp_thread *thread)
  60. {
  61. STARPU_PTHREAD_SETSPECIFIC(omp_thread_key, thread);
  62. }
  63. struct starpu_omp_task *_starpu_omp_get_task(void)
  64. {
  65. struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
  66. return task;
  67. }
  68. static inline void _starpu_omp_set_task(struct starpu_omp_task *task)
  69. {
  70. STARPU_PTHREAD_SETSPECIFIC(omp_task_key, task);
  71. }
  72. struct starpu_omp_region *_starpu_omp_get_region_at_level(int level)
  73. {
  74. const struct starpu_omp_task *task = _starpu_omp_get_task();
  75. struct starpu_omp_region *parallel_region;
  76. if (!task)
  77. return NULL;
  78. parallel_region = task->owner_region;
  79. if (level < 0 || level > parallel_region->icvs.levels_var)
  80. return NULL;
  81. while (level < parallel_region->icvs.levels_var)
  82. {
  83. parallel_region = parallel_region->parent_region;
  84. }
  85. return parallel_region;
  86. }
  87. int _starpu_omp_get_region_thread_num(const struct starpu_omp_region * const region)
  88. {
  89. struct starpu_omp_thread *thread = _starpu_omp_get_thread();
  90. STARPU_ASSERT(thread != NULL);
  91. if (thread == region->master_thread)
  92. return 0;
  93. int tid = starpu_omp_thread_list_member(&region->thread_list, thread);
  94. if (tid >= 0)
  95. return tid+1;
  96. _STARPU_ERROR("unrecognized omp thread\n");
  97. }
  98. static void weak_task_lock(struct starpu_omp_task *task)
  99. {
  100. _starpu_spin_lock(&task->lock);
  101. while (task->transaction_pending)
  102. {
  103. _starpu_spin_unlock(&task->lock);
  104. STARPU_UYIELD();
  105. _starpu_spin_lock(&task->lock);
  106. }
  107. }
  108. static void weak_task_unlock(struct starpu_omp_task *task)
  109. {
  110. _starpu_spin_unlock(&task->lock);
  111. }
  112. static void wake_up_and_unlock_task(struct starpu_omp_task *task)
  113. {
  114. STARPU_ASSERT(task->transaction_pending == 0);
  115. if (task->wait_on == 0)
  116. {
  117. weak_task_unlock(task);
  118. int ret = starpu_task_submit(task->starpu_task);
  119. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  120. } else {
  121. weak_task_unlock(task);
  122. }
  123. }
  124. static void transaction_callback(void *_task)
  125. {
  126. struct starpu_omp_task *task = _task;
  127. _starpu_spin_lock(&task->lock);
  128. STARPU_ASSERT(task->transaction_pending != 0);
  129. task->transaction_pending = 0;
  130. _starpu_spin_unlock(&task->lock);
  131. }
  132. static void condition_init(struct starpu_omp_condition *condition)
  133. {
  134. condition->contention_list_head = NULL;
  135. }
  136. static void condition_exit(struct starpu_omp_condition *condition)
  137. {
  138. STARPU_ASSERT(condition->contention_list_head == NULL);
  139. condition->contention_list_head = NULL;
  140. }
  141. static void condition_wait(struct starpu_omp_condition *condition, struct _starpu_spinlock *lock, enum starpu_omp_task_wait_on flag)
  142. {
  143. struct starpu_omp_task *task = _starpu_omp_get_task();
  144. struct starpu_omp_task_link link;
  145. _starpu_spin_lock(&task->lock);
  146. task->wait_on |= flag;
  147. link.task = task;
  148. link.next = condition->contention_list_head;
  149. condition->contention_list_head = &link;
  150. task->transaction_pending = 1;
  151. _starpu_spin_unlock(&task->lock);
  152. _starpu_spin_unlock(lock);
  153. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  154. starpu_omp_task_preempt();
  155. /* re-acquire the lock released by the callback */
  156. _starpu_spin_lock(lock);
  157. }
  158. #if 0
  159. /* unused for now */
  160. static void condition_signal(struct starpu_omp_condition *condition)
  161. {
  162. if (condition->contention_list_head != NULL)
  163. {
  164. struct starpu_omp_task *next_task = condition->contention_list_head->task;
  165. weak_task_lock(next_task);
  166. condition->contention_list_head = condition->contention_list_head->next;
  167. STARPU_ASSERT(next_task->wait_on & starpu_omp_task_wait_on_condition);
  168. next_task->wait_on &= ~starpu_omp_task_wait_on_condition;
  169. wake_up_and_unlock_task(next_task);
  170. }
  171. }
  172. #endif
  173. static void condition_broadcast(struct starpu_omp_condition *condition, enum starpu_omp_task_wait_on flag)
  174. {
  175. while (condition->contention_list_head != NULL)
  176. {
  177. struct starpu_omp_task *next_task = condition->contention_list_head->task;
  178. weak_task_lock(next_task);
  179. condition->contention_list_head = condition->contention_list_head->next;
  180. STARPU_ASSERT(next_task->wait_on & flag);
  181. next_task->wait_on &= ~flag;
  182. wake_up_and_unlock_task(next_task);
  183. }
  184. }
  185. static void register_thread_worker(struct starpu_omp_thread *thread)
  186. {
  187. STARPU_ASSERT(thread->worker != NULL);
  188. _starpu_spin_lock(&_global_state.hash_workers_lock);
  189. struct _starpu_worker *check = thread->worker;
  190. struct starpu_omp_thread *tmp = NULL;
  191. HASH_FIND_PTR(_global_state.hash_workers, &check, tmp);
  192. STARPU_ASSERT(tmp == NULL);
  193. HASH_ADD_PTR(_global_state.hash_workers, worker, thread);
  194. _starpu_spin_unlock(&_global_state.hash_workers_lock);
  195. }
  196. static struct starpu_omp_thread *get_worker_thread(struct _starpu_worker *starpu_worker)
  197. {
  198. struct starpu_omp_thread *thread = NULL;
  199. _starpu_spin_lock(&_global_state.hash_workers_lock);
  200. HASH_FIND_PTR(_global_state.hash_workers, &starpu_worker, thread);
  201. _starpu_spin_unlock(&_global_state.hash_workers_lock);
  202. return thread;
  203. }
  204. static struct starpu_omp_thread *get_local_thread(void)
  205. {
  206. struct starpu_omp_thread *thread = _starpu_omp_get_thread();
  207. if (thread == NULL)
  208. {
  209. struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key();
  210. STARPU_ASSERT(starpu_worker != NULL);
  211. thread = get_worker_thread(starpu_worker);
  212. if (
  213. #ifdef STARPU_USE_CUDA
  214. (starpu_worker->arch != STARPU_CUDA_WORKER)
  215. &&
  216. #endif
  217. #ifdef STARPU_USE_OPENCL
  218. (starpu_worker->arch != STARPU_OPENCL_WORKER)
  219. &&
  220. #endif
  221. 1
  222. )
  223. {
  224. STARPU_ASSERT(thread != NULL);
  225. }
  226. if (thread != NULL)
  227. {
  228. _starpu_omp_set_thread(thread);
  229. }
  230. }
  231. return thread;
  232. }
  233. static struct starpu_omp_critical *create_omp_critical_struct(void)
  234. {
  235. struct starpu_omp_critical *critical;
  236. _STARPU_CALLOC(critical, 1, sizeof(*critical));
  237. _starpu_spin_init(&critical->lock);
  238. return critical;
  239. }
  240. static void destroy_omp_critical_struct(struct starpu_omp_critical *critical)
  241. {
  242. STARPU_ASSERT(critical->state == 0);
  243. STARPU_ASSERT(critical->contention_list_head == NULL);
  244. _starpu_spin_destroy(&critical->lock);
  245. critical->name = NULL;
  246. free(critical);
  247. }
  248. static struct starpu_omp_device *create_omp_device_struct(void)
  249. {
  250. struct starpu_omp_device *device;
  251. _STARPU_CALLOC(device, 1, sizeof(*device));
  252. _starpu_spin_init(&device->atomic_lock);
  253. return device;
  254. }
  255. static void destroy_omp_device_struct(struct starpu_omp_device *device)
  256. {
  257. _starpu_spin_destroy(&device->atomic_lock);
  258. memset(device, 0, sizeof(*device));
  259. free(device);
  260. }
  261. static struct starpu_omp_device *get_caller_device(void)
  262. {
  263. struct starpu_omp_task *task = _starpu_omp_get_task();
  264. struct starpu_omp_device *device;
  265. if (task)
  266. {
  267. STARPU_ASSERT(task->owner_region != NULL);
  268. device = task->owner_region->owner_device;
  269. }
  270. else
  271. {
  272. device = _global_state.initial_device;
  273. }
  274. STARPU_ASSERT(device != NULL);
  275. return device;
  276. }
  277. static struct starpu_omp_region *create_omp_region_struct(struct starpu_omp_region *parent_region, struct starpu_omp_device *owner_device)
  278. {
  279. struct starpu_omp_region *region;
  280. _STARPU_CALLOC(region, 1, sizeof(*region));
  281. region->parent_region = parent_region;
  282. region->owner_device = owner_device;
  283. starpu_omp_thread_list_init(&region->thread_list);
  284. _starpu_spin_init(&region->lock);
  285. _starpu_spin_init(&region->registered_handles_lock);
  286. region->level = (parent_region != NULL)?parent_region->level+1:0;
  287. return region;
  288. }
  289. static void destroy_omp_region_struct(struct starpu_omp_region *region)
  290. {
  291. STARPU_ASSERT(region->nb_threads == 0);
  292. STARPU_ASSERT(starpu_omp_thread_list_empty(&region->thread_list));
  293. STARPU_ASSERT(region->continuation_starpu_task == NULL);
  294. _starpu_spin_destroy(&region->registered_handles_lock);
  295. _starpu_spin_destroy(&region->lock);
  296. memset(region, 0, sizeof(*region));
  297. free(region);
  298. }
  299. static void omp_initial_thread_func(void)
  300. {
  301. struct starpu_omp_thread *initial_thread = _global_state.initial_thread;
  302. struct starpu_omp_task *initial_task = _global_state.initial_task;
  303. while (1)
  304. {
  305. struct starpu_task *continuation_starpu_task = initial_task->nested_region->continuation_starpu_task;
  306. starpu_driver_run_once(&initial_thread->starpu_driver);
  307. /*
  308. * if we are leaving the first nested region we give control back to initial task
  309. * otherwise, we should continue to execute work
  310. */
  311. if (_starpu_task_test_termination(continuation_starpu_task))
  312. {
  313. initial_task->nested_region->continuation_starpu_task = NULL;
  314. _starpu_omp_set_task(initial_task);
  315. swapcontext(&initial_thread->ctx, &initial_task->ctx);
  316. }
  317. }
  318. }
  319. static struct starpu_omp_thread *create_omp_thread_struct(struct starpu_omp_region *owner_region)
  320. {
  321. struct starpu_omp_thread *thread = starpu_omp_thread_new();
  322. if (thread == NULL)
  323. _STARPU_ERROR("memory allocation failed");
  324. memset(thread, 0, sizeof(*thread));
  325. thread->owner_region = owner_region;
  326. return thread;
  327. }
  328. static void destroy_omp_thread_struct(struct starpu_omp_thread *thread)
  329. {
  330. STARPU_ASSERT(thread->current_task == NULL);
  331. memset(thread, 0, sizeof(*thread));
  332. starpu_omp_thread_delete(thread);
  333. }
  334. static void starpu_omp_explicit_task_entry(struct starpu_omp_task *task)
  335. {
  336. STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT));
  337. struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key();
  338. if (starpu_worker->arch == STARPU_CPU_WORKER)
  339. {
  340. task->cpu_f(task->starpu_buffers, task->starpu_cl_arg);
  341. }
  342. #ifdef STARPU_USE_CUDA
  343. else if (starpu_worker->arch == STARPU_CUDA_WORKER)
  344. {
  345. task->cuda_f(task->starpu_buffers, task->starpu_cl_arg);
  346. }
  347. #endif
  348. #ifdef STARPU_USE_OPENCL
  349. else if (starpu_worker->arch == STARPU_OPENCL_WORKER)
  350. {
  351. task->opencl_f(task->starpu_buffers, task->starpu_cl_arg);
  352. }
  353. #endif
  354. else
  355. _STARPU_ERROR("invalid worker architecture");
  356. _starpu_omp_unregister_task_handles(task);
  357. _starpu_spin_lock(&task->lock);
  358. task->state = starpu_omp_task_state_terminated;
  359. task->transaction_pending=1;
  360. _starpu_spin_unlock(&task->lock);
  361. struct starpu_omp_thread *thread = _starpu_omp_get_thread();
  362. /*
  363. * the task reached the terminated state, definitively give hand back to the worker code.
  364. *
  365. * about to run on the worker stack...
  366. */
  367. setcontext(&thread->ctx);
  368. STARPU_ASSERT(0); /* unreachable code */
  369. }
  370. static void starpu_omp_implicit_task_entry(struct starpu_omp_task *task)
  371. {
  372. struct starpu_omp_thread *thread = _starpu_omp_get_thread();
  373. STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT);
  374. task->cpu_f(task->starpu_buffers, task->starpu_cl_arg);
  375. starpu_omp_barrier();
  376. if (thread == task->owner_region->master_thread)
  377. {
  378. _starpu_omp_unregister_region_handles(task->owner_region);
  379. }
  380. task->state = starpu_omp_task_state_terminated;
  381. /*
  382. * the task reached the terminated state, definitively give hand back to the worker code.
  383. *
  384. * about to run on the worker stack...
  385. */
  386. setcontext(&thread->ctx);
  387. STARPU_ASSERT(0); /* unreachable code */
  388. }
  389. /*
  390. * stop executing a task that is about to block
  391. * and give hand back to the thread
  392. */
  393. static void starpu_omp_task_preempt(void)
  394. {
  395. struct starpu_omp_task *task = _starpu_omp_get_task();
  396. struct starpu_omp_thread *thread = _starpu_omp_get_thread();
  397. task->state = starpu_omp_task_state_preempted;
  398. /*
  399. * the task reached a blocked state, give hand back to the worker code.
  400. *
  401. * about to run on the worker stack...
  402. */
  403. swapcontext(&task->ctx, &thread->ctx);
  404. /* now running on the task stack again */
  405. }
  406. /*
  407. * wrap a task function to allow the task to be preempted
  408. */
  409. static void starpu_omp_implicit_task_exec(void *buffers[], void *cl_arg)
  410. {
  411. struct starpu_omp_task *task = starpu_task_get_current()->omp_task;
  412. STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT);
  413. _starpu_omp_set_task(task);
  414. struct starpu_omp_thread *thread = get_local_thread();
  415. if (task->state != starpu_omp_task_state_preempted)
  416. {
  417. task->starpu_buffers = buffers;
  418. task->starpu_cl_arg = cl_arg;
  419. STARPU_ASSERT(task->stack == NULL);
  420. STARPU_ASSERT(task->stacksize > 0);
  421. _STARPU_MALLOC(task->stack, task->stacksize);
  422. getcontext(&task->ctx);
  423. /*
  424. * we do not use uc_link, starpu_omp_task_entry will handle
  425. * the end of the task
  426. */
  427. task->ctx.uc_link = NULL;
  428. task->ctx.uc_stack.ss_sp = task->stack;
  429. task->ctx.uc_stack.ss_size = task->stacksize;
  430. task->stack_vg_id = VALGRIND_STACK_REGISTER(task->stack, task->stack+task->stacksize);
  431. makecontext(&task->ctx, (void (*) ()) starpu_omp_implicit_task_entry, 1, task);
  432. }
  433. task->state = starpu_omp_task_state_clear;
  434. /*
  435. * start the task execution, or restore a previously preempted task.
  436. * about to run on the task stack...
  437. * */
  438. swapcontext(&thread->ctx, &task->ctx);
  439. /* now running on the worker stack again */
  440. STARPU_ASSERT(task->state == starpu_omp_task_state_preempted
  441. || task->state == starpu_omp_task_state_terminated);
  442. _starpu_omp_set_task(NULL);
  443. /* TODO: analyse the cause of the return and take appropriate steps */
  444. if (task->state == starpu_omp_task_state_terminated)
  445. {
  446. task->starpu_task->omp_task = NULL;
  447. task->starpu_task = NULL;
  448. VALGRIND_STACK_DEREGISTER(task->stack_vg_id);
  449. task->stack_vg_id = 0;
  450. free(task->stack);
  451. task->stack = NULL;
  452. memset(&task->ctx, 0, sizeof(task->ctx));
  453. }
  454. else if (task->state != starpu_omp_task_state_preempted)
  455. _STARPU_ERROR("invalid omp task state");
  456. }
  457. static void starpu_omp_task_completion_accounting(struct starpu_omp_task *task)
  458. {
  459. struct starpu_omp_task *parent_task = task->parent_task;
  460. struct starpu_omp_region *parallel_region = task->owner_region;
  461. weak_task_lock(parent_task);
  462. if (STARPU_ATOMIC_ADD(&parent_task->child_task_count, -1) == 0)
  463. {
  464. if (parent_task->state == starpu_omp_task_state_zombie)
  465. {
  466. STARPU_ASSERT(!(parent_task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT));
  467. weak_task_unlock(parent_task);
  468. destroy_omp_task_struct(parent_task);
  469. }
  470. else if (parent_task->wait_on & starpu_omp_task_wait_on_task_childs)
  471. {
  472. parent_task->wait_on &= ~starpu_omp_task_wait_on_task_childs;
  473. wake_up_and_unlock_task(parent_task);
  474. }
  475. else
  476. {
  477. weak_task_unlock(parent_task);
  478. }
  479. }
  480. else
  481. {
  482. weak_task_unlock(parent_task);
  483. }
  484. _starpu_spin_lock(&parallel_region->lock);
  485. if (STARPU_ATOMIC_ADD(&parallel_region->bound_explicit_task_count, -1) == 0)
  486. {
  487. struct starpu_omp_task *waiting_task = parallel_region->waiting_task;
  488. _starpu_spin_unlock(&parallel_region->lock);
  489. if (waiting_task)
  490. {
  491. weak_task_lock(waiting_task);
  492. _starpu_spin_lock(&parallel_region->lock);
  493. parallel_region->waiting_task = NULL;
  494. STARPU_ASSERT(waiting_task->wait_on & starpu_omp_task_wait_on_region_tasks);
  495. waiting_task->wait_on &= ~starpu_omp_task_wait_on_region_tasks;
  496. _starpu_spin_unlock(&parallel_region->lock);
  497. wake_up_and_unlock_task(waiting_task);
  498. }
  499. }
  500. else
  501. {
  502. _starpu_spin_unlock(&parallel_region->lock);
  503. }
  504. if (task->task_group)
  505. {
  506. struct starpu_omp_task *leader_task = task->task_group->leader_task;
  507. STARPU_ASSERT(leader_task != task);
  508. weak_task_lock(leader_task);
  509. if (STARPU_ATOMIC_ADD(&task->task_group->descendent_task_count, -1) == 0)
  510. {
  511. if (leader_task->wait_on & starpu_omp_task_wait_on_group
  512. && task->task_group == leader_task->task_group)
  513. /* only wake the leader_task if it is actually
  514. * waiting for the current task's task_group */
  515. {
  516. leader_task->wait_on &= ~starpu_omp_task_wait_on_group;
  517. wake_up_and_unlock_task(leader_task);
  518. }
  519. else
  520. {
  521. weak_task_unlock(leader_task);
  522. }
  523. }
  524. else
  525. {
  526. weak_task_unlock(leader_task);
  527. }
  528. }
  529. }
  530. /*
  531. * wrap a task function to allow the task to be preempted
  532. */
  533. static void starpu_omp_explicit_task_exec(void *buffers[], void *cl_arg)
  534. {
  535. struct starpu_omp_task *task = starpu_task_get_current()->omp_task;
  536. STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT));
  537. _starpu_omp_set_task(task);
  538. struct starpu_omp_thread *thread = get_local_thread();
  539. if (task->state != starpu_omp_task_state_preempted)
  540. {
  541. if (thread == NULL)
  542. {
  543. struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key();
  544. if (starpu_worker->arch != STARPU_CPU_WORKER)
  545. {
  546. if (
  547. #ifdef STARPU_USE_CUDA
  548. (starpu_worker->arch != STARPU_CUDA_WORKER)
  549. &&
  550. #endif
  551. #ifdef STARPU_USE_OPENCL
  552. (starpu_worker->arch != STARPU_OPENCL_WORKER)
  553. &&
  554. #endif
  555. 1
  556. )
  557. {
  558. _STARPU_ERROR("invalid worker architecture");
  559. }
  560. struct starpu_omp_thread *new_thread;
  561. new_thread = create_omp_thread_struct(NULL);
  562. new_thread->worker = starpu_worker;
  563. register_thread_worker(new_thread);
  564. thread = get_local_thread();
  565. STARPU_ASSERT(thread == new_thread);
  566. }
  567. else
  568. {
  569. _STARPU_ERROR("orphaned CPU thread");
  570. }
  571. }
  572. STARPU_ASSERT(thread != NULL);
  573. if (!(task->flags & STARPU_OMP_TASK_FLAGS_UNTIED))
  574. {
  575. struct _starpu_worker *starpu_worker = _starpu_get_local_worker_key();
  576. task->starpu_task->workerid = starpu_worker->workerid;
  577. task->starpu_task->execute_on_a_specific_worker = 1;
  578. }
  579. task->starpu_buffers = buffers;
  580. task->starpu_cl_arg = cl_arg;
  581. STARPU_ASSERT(task->stack == NULL);
  582. STARPU_ASSERT(task->stacksize > 0);
  583. _STARPU_MALLOC(task->stack, task->stacksize);
  584. getcontext(&task->ctx);
  585. /*
  586. * we do not use uc_link, starpu_omp_task_entry will handle
  587. * the end of the task
  588. */
  589. task->ctx.uc_link = NULL;
  590. task->ctx.uc_stack.ss_sp = task->stack;
  591. task->ctx.uc_stack.ss_size = task->stacksize;
  592. makecontext(&task->ctx, (void (*) ()) starpu_omp_explicit_task_entry, 1, task);
  593. }
  594. task->state = starpu_omp_task_state_clear;
  595. /*
  596. * start the task execution, or restore a previously preempted task.
  597. * about to run on the task stack...
  598. * */
  599. swapcontext(&thread->ctx, &task->ctx);
  600. /* now running on the worker stack again */
  601. STARPU_ASSERT(task->state == starpu_omp_task_state_preempted
  602. || task->state == starpu_omp_task_state_terminated);
  603. _starpu_omp_set_task(NULL);
  604. /* TODO: analyse the cause of the return and take appropriate steps */
  605. if (task->state == starpu_omp_task_state_terminated)
  606. {
  607. free(task->stack);
  608. task->stack = NULL;
  609. memset(&task->ctx, 0, sizeof(task->ctx));
  610. starpu_omp_task_completion_accounting(task);
  611. }
  612. else if (task->state != starpu_omp_task_state_preempted)
  613. _STARPU_ERROR("invalid omp task state");
  614. }
  615. static struct starpu_omp_task *create_omp_task_struct(struct starpu_omp_task *parent_task,
  616. struct starpu_omp_thread *owner_thread, struct starpu_omp_region *owner_region, int is_implicit)
  617. {
  618. struct starpu_omp_task *task = starpu_omp_task_new();
  619. if (task == NULL)
  620. _STARPU_ERROR("memory allocation failed");
  621. memset(task, 0, sizeof(*task));
  622. task->parent_task = parent_task;
  623. task->owner_thread = owner_thread;
  624. task->owner_region = owner_region;
  625. if (is_implicit)
  626. {
  627. task->flags |= STARPU_OMP_TASK_FLAGS_IMPLICIT;
  628. }
  629. _starpu_spin_init(&task->lock);
  630. /* TODO: initialize task->data_env_icvs with proper values */
  631. memset(&task->data_env_icvs, 0, sizeof(task->data_env_icvs));
  632. if (is_implicit)
  633. {
  634. /* TODO: initialize task->implicit_task_icvs with proper values */
  635. memset(&task->implicit_task_icvs, 0, sizeof(task->implicit_task_icvs));
  636. }
  637. if (owner_region->level > 0)
  638. {
  639. STARPU_ASSERT(owner_region->owner_device->icvs.stacksize_var > 0);
  640. task->stacksize = owner_region->owner_device->icvs.stacksize_var;
  641. }
  642. return task;
  643. }
  644. static void destroy_omp_task_struct(struct starpu_omp_task *task)
  645. {
  646. STARPU_ASSERT(task->state == starpu_omp_task_state_terminated || (task->state == starpu_omp_task_state_zombie && task->child_task_count == 0) || task->state == starpu_omp_task_state_target);
  647. if (task->state == starpu_omp_task_state_target)
  648. {
  649. starpu_omp_task_completion_accounting(task);
  650. }
  651. STARPU_ASSERT(task->nested_region == NULL);
  652. STARPU_ASSERT(task->starpu_task == NULL);
  653. STARPU_ASSERT(task->stack == NULL);
  654. _starpu_spin_destroy(&task->lock);
  655. memset(task, 0, sizeof(*task));
  656. starpu_omp_task_delete(task);
  657. }
  658. /*
  659. * setup the main application thread to handle the possible preemption of the initial task
  660. */
  661. static void omp_initial_thread_setup(void)
  662. {
  663. struct starpu_omp_thread *initial_thread = _global_state.initial_thread;
  664. struct starpu_omp_task *initial_task = _global_state.initial_task;
  665. /* .current_task */
  666. initial_thread->current_task = initial_task;
  667. /* .owner_region already set in create_omp_thread_struct */
  668. /* .initial_thread_stack */
  669. _STARPU_MALLOC(initial_thread->initial_thread_stack, _STARPU_INITIAL_THREAD_STACKSIZE);
  670. if (initial_thread->initial_thread_stack == NULL)
  671. _STARPU_ERROR("memory allocation failed");
  672. /* .ctx */
  673. getcontext(&initial_thread->ctx);
  674. /*
  675. * we do not use uc_link, the initial thread always should give hand back to the initial task
  676. */
  677. initial_thread->ctx.uc_link = NULL;
  678. initial_thread->ctx.uc_stack.ss_sp = initial_thread->initial_thread_stack;
  679. initial_thread->ctx.uc_stack.ss_size = _STARPU_INITIAL_THREAD_STACKSIZE;
  680. initial_thread->initial_thread_stack_vg_id = VALGRIND_STACK_REGISTER(initial_thread->initial_thread_stack, initial_thread->initial_thread_stack+_STARPU_INITIAL_THREAD_STACKSIZE);
  681. makecontext(&initial_thread->ctx, omp_initial_thread_func, 0);
  682. /* .starpu_driver */
  683. /*
  684. * we configure starpu to not launch CPU worker 0
  685. * because we will use the main thread to play the role of worker 0
  686. */
  687. struct starpu_conf omp_starpu_conf;
  688. int ret = starpu_conf_init(&omp_starpu_conf);
  689. STARPU_CHECK_RETURN_VALUE(ret, "starpu_conf_init");
  690. initial_thread->starpu_driver.type = STARPU_CPU_WORKER;
  691. initial_thread->starpu_driver.id.cpu_id = 0;
  692. omp_starpu_conf.not_launched_drivers = &initial_thread->starpu_driver;
  693. omp_starpu_conf.n_not_launched_drivers = 1;
  694. /* we are now ready to start StarPU */
  695. ret = starpu_init(&omp_starpu_conf);
  696. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  697. ret = starpu_driver_init(&initial_thread->starpu_driver);
  698. STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_init");
  699. _starpu_omp_set_task(initial_task);
  700. _global_state.nb_starpu_cpu_workers = starpu_worker_get_count_by_type(STARPU_CPU_WORKER);
  701. _STARPU_MALLOC(_global_state.starpu_cpu_worker_ids, _global_state.nb_starpu_cpu_workers * sizeof(int));
  702. if (_global_state.starpu_cpu_worker_ids == NULL)
  703. _STARPU_ERROR("memory allocation failed");
  704. ret = starpu_worker_get_ids_by_type(STARPU_CPU_WORKER, _global_state.starpu_cpu_worker_ids, _global_state.nb_starpu_cpu_workers);
  705. STARPU_ASSERT(ret == _global_state.nb_starpu_cpu_workers);
  706. initial_thread->worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[0]);
  707. STARPU_ASSERT(initial_thread->worker);
  708. STARPU_ASSERT(initial_thread->worker->arch == STARPU_CPU_WORKER);
  709. _starpu_omp_set_thread(initial_thread);
  710. register_thread_worker(initial_thread);
  711. }
  712. static void omp_initial_thread_exit()
  713. {
  714. struct starpu_omp_thread *initial_thread = _global_state.initial_thread;
  715. int ret = starpu_driver_deinit(&initial_thread->starpu_driver);
  716. STARPU_CHECK_RETURN_VALUE(ret, "starpu_driver_deinit");
  717. memset(&initial_thread->starpu_driver, 0, sizeof (initial_thread->starpu_driver));
  718. /* the driver for the main thread is now de-inited, we can shutdown Starpu */
  719. starpu_shutdown();
  720. free(_global_state.starpu_cpu_worker_ids);
  721. _global_state.starpu_cpu_worker_ids = NULL;
  722. _global_state.nb_starpu_cpu_workers = 0;
  723. VALGRIND_STACK_DEREGISTER(initial_thread->initial_thread_stack_vg_id);
  724. free(initial_thread->initial_thread_stack);
  725. initial_thread->initial_thread_stack = NULL;
  726. memset(&initial_thread->ctx, 0, sizeof (initial_thread->ctx));
  727. initial_thread->current_task = NULL;
  728. }
  729. static void omp_initial_region_setup(void)
  730. {
  731. omp_initial_thread_setup();
  732. const int max_active_levels = _starpu_omp_initial_icv_values->max_active_levels_var;
  733. const int max_threads = (int)starpu_cpu_worker_get_count();
  734. /* implementation specific initial ICV values override */
  735. if (_starpu_omp_initial_icv_values->nthreads_var[0] == 0)
  736. {
  737. _starpu_omp_initial_icv_values->nthreads_var[0] = max_threads;
  738. _starpu_omp_initial_icv_values->nthreads_var[1] = 0;
  739. }
  740. else
  741. {
  742. int i;
  743. for (i = 0; i < max_active_levels; i++)
  744. {
  745. if (_starpu_omp_initial_icv_values->nthreads_var[i] == 0)
  746. break;
  747. if (_starpu_omp_initial_icv_values->nthreads_var[i] > max_threads)
  748. {
  749. _starpu_omp_initial_icv_values->nthreads_var[i] = max_threads;
  750. }
  751. }
  752. }
  753. _starpu_omp_initial_icv_values->dyn_var = 0;
  754. _starpu_omp_initial_icv_values->nest_var = 0;
  755. _global_state.initial_device->icvs.max_active_levels_var = max_active_levels;
  756. _global_state.initial_device->icvs.def_sched_var = _starpu_omp_initial_icv_values->def_sched_var;
  757. _global_state.initial_device->icvs.def_sched_chunk_var = _starpu_omp_initial_icv_values->def_sched_chunk_var;
  758. _global_state.initial_device->icvs.stacksize_var = _starpu_omp_initial_icv_values->stacksize_var;
  759. _global_state.initial_device->icvs.wait_policy_var = _starpu_omp_initial_icv_values->wait_policy_var;
  760. _global_state.initial_region->master_thread = _global_state.initial_thread;
  761. _global_state.initial_region->nb_threads++;
  762. _global_state.initial_region->icvs.dyn_var = _starpu_omp_initial_icv_values->dyn_var;
  763. _global_state.initial_region->icvs.nest_var = _starpu_omp_initial_icv_values->nest_var;
  764. if (_starpu_omp_initial_icv_values->nthreads_var[1] != 0)
  765. {
  766. _STARPU_MALLOC(_global_state.initial_region->icvs.nthreads_var, (1+max_active_levels-_global_state.initial_region->level) * sizeof(*_global_state.initial_region->icvs.nthreads_var));
  767. int i,j;
  768. for (i = _global_state.initial_region->level, j = 0; i < max_active_levels; i++, j++)
  769. {
  770. _global_state.initial_region->icvs.nthreads_var[j] = _starpu_omp_initial_icv_values->nthreads_var[j];
  771. }
  772. _global_state.initial_region->icvs.nthreads_var[j] = 0;
  773. }
  774. else
  775. {
  776. _STARPU_MALLOC(_global_state.initial_region->icvs.nthreads_var, 2 * sizeof(*_global_state.initial_region->icvs.nthreads_var));
  777. _global_state.initial_region->icvs.nthreads_var[0] = _starpu_omp_initial_icv_values->nthreads_var[0];
  778. _global_state.initial_region->icvs.nthreads_var[1] = 0;
  779. }
  780. if (_starpu_omp_initial_icv_values->bind_var[1] != starpu_omp_proc_bind_undefined)
  781. {
  782. _STARPU_MALLOC(_global_state.initial_region->icvs.bind_var, (1+max_active_levels-_global_state.initial_region->level) * sizeof(*_global_state.initial_region->icvs.bind_var));
  783. int i,j;
  784. for (i = _global_state.initial_region->level, j = 0; i < max_active_levels; i++, j++)
  785. {
  786. _global_state.initial_region->icvs.bind_var[j] = _starpu_omp_initial_icv_values->bind_var[j];
  787. }
  788. _global_state.initial_region->icvs.bind_var[j] = starpu_omp_proc_bind_undefined;
  789. }
  790. else
  791. {
  792. _STARPU_MALLOC(_global_state.initial_region->icvs.bind_var, 2 * sizeof(*_global_state.initial_region->icvs.bind_var));
  793. _global_state.initial_region->icvs.bind_var[0] = _starpu_omp_initial_icv_values->bind_var[0];
  794. _global_state.initial_region->icvs.bind_var[1] = starpu_omp_proc_bind_undefined;
  795. }
  796. _global_state.initial_region->icvs.thread_limit_var = _starpu_omp_initial_icv_values->thread_limit_var;
  797. _global_state.initial_region->icvs.active_levels_var = 0;
  798. _global_state.initial_region->icvs.levels_var = 0;
  799. _global_state.initial_region->icvs.run_sched_var = _starpu_omp_initial_icv_values->run_sched_var;
  800. _global_state.initial_region->icvs.run_sched_chunk_var = _starpu_omp_initial_icv_values->run_sched_chunk_var;
  801. _global_state.initial_region->icvs.default_device_var = _starpu_omp_initial_icv_values->default_device_var;
  802. _global_state.initial_region->icvs.max_task_priority_var = _starpu_omp_initial_icv_values->max_task_priority_var;
  803. _global_state.initial_region->implicit_task_array = &_global_state.initial_task;
  804. }
  805. static void omp_initial_region_exit(void)
  806. {
  807. omp_initial_thread_exit();
  808. _global_state.initial_task->state = starpu_omp_task_state_terminated;
  809. _global_state.initial_region->implicit_task_array = NULL;
  810. _global_state.initial_region->master_thread = NULL;
  811. free(_global_state.initial_region->icvs.nthreads_var);
  812. free(_global_state.initial_region->icvs.bind_var);
  813. _global_state.initial_region->nb_threads--;
  814. }
  815. /*
  816. * If StarPU was compiled with --enable-openmp, but the OpenMP runtime support
  817. * is not in use, starpu_init() may have been called directly instead of
  818. * through starpu_omp_init(). However, some starpu_omp functions may be still
  819. * be called such as _starpu_omp_get_task(). So let's setup a basic environment
  820. * for them.
  821. */
  822. void _starpu_omp_dummy_init(void)
  823. {
  824. if (_starpu_omp_global_state != &_global_state)
  825. {
  826. STARPU_PTHREAD_KEY_CREATE(&omp_thread_key, NULL);
  827. STARPU_PTHREAD_KEY_CREATE(&omp_task_key, NULL);
  828. }
  829. }
  830. /*
  831. * Free data structures allocated by _starpu_omp_dummy_init().
  832. */
  833. void _starpu_omp_dummy_shutdown(void)
  834. {
  835. if (_starpu_omp_global_state != &_global_state)
  836. {
  837. STARPU_PTHREAD_KEY_DELETE(omp_thread_key);
  838. STARPU_PTHREAD_KEY_DELETE(omp_task_key);
  839. }
  840. }
  841. /*
  842. * Entry point to be called by the OpenMP runtime constructor
  843. */
  844. int starpu_omp_init(void)
  845. {
  846. _starpu_omp_global_state = &_global_state;
  847. STARPU_PTHREAD_KEY_CREATE(&omp_thread_key, NULL);
  848. STARPU_PTHREAD_KEY_CREATE(&omp_task_key, NULL);
  849. _global_state.initial_device = create_omp_device_struct();
  850. _global_state.initial_region = create_omp_region_struct(NULL, _global_state.initial_device);
  851. _global_state.initial_thread = create_omp_thread_struct(_global_state.initial_region);
  852. _global_state.initial_task = create_omp_task_struct(NULL,
  853. _global_state.initial_thread, _global_state.initial_region, 1);
  854. _global_state.default_critical = create_omp_critical_struct();
  855. _global_state.default_arbiter = starpu_arbiter_create();
  856. _global_state.named_criticals = NULL;
  857. _starpu_spin_init(&_global_state.named_criticals_lock);
  858. _global_state.hash_workers = NULL;
  859. _starpu_spin_init(&_global_state.hash_workers_lock);
  860. _starpu_omp_environment_init();
  861. _global_state.icvs.cancel_var = _starpu_omp_initial_icv_values->cancel_var;
  862. omp_initial_region_setup();
  863. /* init clock reference for starpu_omp_get_wtick */
  864. _starpu_omp_clock_ref = starpu_timing_now();
  865. return 0;
  866. }
  867. void starpu_omp_shutdown(void)
  868. {
  869. omp_initial_region_exit();
  870. /* TODO: free ICV variables */
  871. /* TODO: free task/thread/region/device structures */
  872. destroy_omp_task_struct(_global_state.initial_task);
  873. _global_state.initial_task = NULL;
  874. _global_state.initial_thread = NULL;
  875. destroy_omp_region_struct(_global_state.initial_region);
  876. _global_state.initial_region = NULL;
  877. destroy_omp_device_struct(_global_state.initial_device);
  878. _global_state.initial_device = NULL;
  879. destroy_omp_critical_struct(_global_state.default_critical);
  880. _global_state.default_critical = NULL;
  881. starpu_arbiter_destroy(_global_state.default_arbiter);
  882. _global_state.default_arbiter = NULL;
  883. _starpu_spin_lock(&_global_state.named_criticals_lock);
  884. {
  885. struct starpu_omp_critical *critical, *tmp;
  886. HASH_ITER(hh, _global_state.named_criticals, critical, tmp)
  887. {
  888. STARPU_ASSERT(critical != NULL);
  889. HASH_DEL(_global_state.named_criticals, critical);
  890. destroy_omp_critical_struct(critical);
  891. }
  892. }
  893. STARPU_ASSERT(_global_state.named_criticals == NULL);
  894. _starpu_spin_unlock(&_global_state.named_criticals_lock);
  895. _starpu_spin_destroy(&_global_state.named_criticals_lock);
  896. _starpu_spin_lock(&_global_state.hash_workers_lock);
  897. {
  898. struct starpu_omp_thread *thread, *tmp;
  899. HASH_ITER(hh, _global_state.hash_workers, thread, tmp)
  900. {
  901. STARPU_ASSERT(thread != NULL);
  902. HASH_DEL(_global_state.hash_workers, thread);
  903. destroy_omp_thread_struct(thread);
  904. }
  905. }
  906. STARPU_ASSERT(_global_state.hash_workers == NULL);
  907. _starpu_spin_unlock(&_global_state.hash_workers_lock);
  908. _starpu_spin_destroy(&_global_state.hash_workers_lock);
  909. _starpu_omp_environment_exit();
  910. STARPU_PTHREAD_KEY_DELETE(omp_task_key);
  911. STARPU_PTHREAD_KEY_DELETE(omp_thread_key);
  912. }
  913. static void implicit_task__destroy_callback(void *_task)
  914. {
  915. struct starpu_omp_task *task = _task;
  916. destroy_omp_task_struct(task);
  917. }
  918. void starpu_omp_parallel_region(const struct starpu_omp_parallel_region_attr *attr)
  919. {
  920. struct starpu_omp_thread *master_thread = _starpu_omp_get_thread();
  921. struct starpu_omp_task *task = _starpu_omp_get_task();
  922. struct starpu_omp_region *generating_region = task->owner_region;
  923. const int max_active_levels = generating_region->owner_device->icvs.max_active_levels_var;
  924. struct starpu_omp_region *new_region =
  925. create_omp_region_struct(generating_region, _global_state.initial_device);
  926. int ret;
  927. int nb_threads = 1;
  928. /* TODO: for now, nested parallel sections are not supported, thus we
  929. * open an active parallel section only if the generating region is the
  930. * initial region */
  931. if (attr->if_clause != 0)
  932. {
  933. const int max_threads = (int)starpu_cpu_worker_get_count();
  934. if (attr->num_threads > 0)
  935. {
  936. nb_threads = attr->num_threads;
  937. }
  938. else
  939. {
  940. nb_threads = generating_region->icvs.nthreads_var[0];
  941. }
  942. if (nb_threads > max_threads)
  943. {
  944. nb_threads = max_threads;
  945. }
  946. if (nb_threads > 1 && generating_region->icvs.active_levels_var+1 > max_active_levels)
  947. {
  948. nb_threads = 1;
  949. }
  950. }
  951. STARPU_ASSERT(nb_threads > 0);
  952. new_region->icvs.dyn_var = generating_region->icvs.dyn_var;
  953. new_region->icvs.nest_var = generating_region->icvs.nest_var;
  954. /* the nthreads_var and bind_var arrays do not hold more than
  955. * max_active_levels entries at most, even if some in-between levels
  956. * are inactive */
  957. if (new_region->level < max_active_levels)
  958. {
  959. if (generating_region->icvs.nthreads_var[1] != 0)
  960. {
  961. _STARPU_MALLOC(new_region->icvs.nthreads_var, (1+max_active_levels-new_region->level) * sizeof(*new_region->icvs.nthreads_var));
  962. int i,j;
  963. for (i = new_region->level, j = 0; i < max_active_levels; i++, j++)
  964. {
  965. new_region->icvs.nthreads_var[j] = generating_region->icvs.nthreads_var[j+1];
  966. }
  967. new_region->icvs.nthreads_var[j] = 0;
  968. }
  969. else
  970. {
  971. _STARPU_MALLOC(new_region->icvs.nthreads_var, 2 * sizeof(*new_region->icvs.nthreads_var));
  972. new_region->icvs.nthreads_var[0] = generating_region->icvs.nthreads_var[0];
  973. new_region->icvs.nthreads_var[1] = 0;
  974. }
  975. if (generating_region->icvs.bind_var[1] != starpu_omp_proc_bind_undefined)
  976. {
  977. _STARPU_MALLOC(new_region->icvs.bind_var, (1+max_active_levels-new_region->level) * sizeof(*new_region->icvs.bind_var));
  978. int i,j;
  979. for (i = new_region->level, j = 0; i < max_active_levels; i++, j++)
  980. {
  981. new_region->icvs.bind_var[j] = generating_region->icvs.bind_var[j+1];
  982. }
  983. new_region->icvs.bind_var[j] = starpu_omp_proc_bind_undefined;
  984. }
  985. else
  986. {
  987. _STARPU_MALLOC(new_region->icvs.bind_var, 2 * sizeof(*new_region->icvs.bind_var));
  988. new_region->icvs.bind_var[0] = generating_region->icvs.bind_var[0];
  989. new_region->icvs.bind_var[1] = starpu_omp_proc_bind_undefined;
  990. }
  991. }
  992. else
  993. {
  994. _STARPU_MALLOC(new_region->icvs.nthreads_var, sizeof(*new_region->icvs.nthreads_var));
  995. new_region->icvs.nthreads_var[0] = generating_region->icvs.nthreads_var[0];
  996. _STARPU_MALLOC(new_region->icvs.bind_var, sizeof(*new_region->icvs.bind_var));
  997. new_region->icvs.bind_var[0] = generating_region->icvs.bind_var[0];
  998. }
  999. new_region->icvs.thread_limit_var = generating_region->icvs.thread_limit_var;
  1000. new_region->icvs.active_levels_var = (nb_threads > 1)?generating_region->icvs.active_levels_var+1:generating_region->icvs.active_levels_var;
  1001. new_region->icvs.levels_var = generating_region->icvs.levels_var+1;
  1002. new_region->icvs.run_sched_var = generating_region->icvs.run_sched_var;
  1003. new_region->icvs.run_sched_chunk_var = generating_region->icvs.run_sched_chunk_var;
  1004. new_region->icvs.default_device_var = generating_region->icvs.default_device_var;
  1005. new_region->icvs.max_task_priority_var = generating_region->icvs.max_task_priority_var;
  1006. _STARPU_CALLOC(new_region->implicit_task_array, nb_threads, sizeof(*new_region->implicit_task_array));
  1007. int i;
  1008. for (i = 0; i < nb_threads; i++)
  1009. {
  1010. struct starpu_omp_thread *new_thread;
  1011. if (i == 0)
  1012. {
  1013. new_thread = master_thread;
  1014. new_region->master_thread = master_thread;
  1015. }
  1016. else
  1017. {
  1018. /* TODO: specify actual starpu worker */
  1019. /* TODO: use a less arbitrary thread/worker mapping scheme */
  1020. if (generating_region->level == 0)
  1021. {
  1022. struct _starpu_worker *worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[i]);
  1023. new_thread = get_worker_thread(worker);
  1024. if (new_thread == NULL)
  1025. {
  1026. new_thread = create_omp_thread_struct(new_region);
  1027. new_thread->worker = _starpu_get_worker_struct(_global_state.starpu_cpu_worker_ids[i]);
  1028. register_thread_worker(new_thread);
  1029. }
  1030. }
  1031. else
  1032. {
  1033. new_thread = master_thread;
  1034. }
  1035. starpu_omp_thread_list_push_back(&new_region->thread_list, new_thread);
  1036. }
  1037. struct starpu_omp_task *new_task = create_omp_task_struct(task, new_thread, new_region, 1);
  1038. new_task->rank = new_region->nb_threads;
  1039. new_region->nb_threads++;
  1040. new_region->implicit_task_array[i] = new_task;
  1041. }
  1042. STARPU_ASSERT(new_region->nb_threads == nb_threads);
  1043. /*
  1044. * if task == initial_task, create a starpu task as a continuation to all the implicit
  1045. * tasks of the new region, else prepare the task for preemption,
  1046. * to become itself a continuation to the implicit tasks of the new region
  1047. */
  1048. if (task == _global_state.initial_task)
  1049. {
  1050. new_region->continuation_starpu_task = starpu_task_create();
  1051. /* in that case, the continuation starpu task is only used for synchronisation */
  1052. new_region->continuation_starpu_task->cl = NULL;
  1053. new_region->continuation_starpu_task->workerid = master_thread->worker->workerid;
  1054. new_region->continuation_starpu_task->execute_on_a_specific_worker = 1;
  1055. /* this sync task will be tested for completion in omp_initial_thread_func() */
  1056. new_region->continuation_starpu_task->detach = 0;
  1057. }
  1058. else
  1059. {
  1060. /* through the preemption, the parent starpu task becomes the continuation task */
  1061. _starpu_task_prepare_for_continuation();
  1062. new_region->continuation_starpu_task = task->starpu_task;
  1063. }
  1064. task->nested_region = new_region;
  1065. /*
  1066. * create the starpu tasks for the implicit omp tasks,
  1067. * create explicit dependencies between these starpu tasks and the continuation starpu task
  1068. */
  1069. for (i = 0; i < nb_threads; i++)
  1070. {
  1071. struct starpu_omp_task * implicit_task = new_region->implicit_task_array[i];
  1072. implicit_task->cl = attr->cl;
  1073. /*
  1074. * save pointer to the regions user function from the parallel region codelet
  1075. *
  1076. * TODO: add support for multiple/heterogeneous implementations
  1077. */
  1078. implicit_task->cpu_f = implicit_task->cl.cpu_funcs[0];
  1079. /*
  1080. * plug the task wrapper into the parallel region codelet instead, to support task preemption
  1081. */
  1082. implicit_task->cl.cpu_funcs[0] = starpu_omp_implicit_task_exec;
  1083. implicit_task->starpu_task = starpu_task_create();
  1084. _starpu_task_set_omp_cleanup_callback(implicit_task->starpu_task, implicit_task__destroy_callback, implicit_task);
  1085. implicit_task->starpu_task->cl = &implicit_task->cl;
  1086. {
  1087. int j;
  1088. for (j = 0; j < implicit_task->cl.nbuffers; j++)
  1089. {
  1090. implicit_task->starpu_task->handles[j] = attr->handles[j];
  1091. }
  1092. }
  1093. implicit_task->starpu_task->cl_arg = attr->cl_arg;
  1094. implicit_task->starpu_task->cl_arg_size = attr->cl_arg_size;
  1095. implicit_task->starpu_task->cl_arg_free = attr->cl_arg_free;
  1096. implicit_task->starpu_task->omp_task = implicit_task;
  1097. implicit_task->starpu_task->workerid = implicit_task->owner_thread->worker->workerid;
  1098. implicit_task->starpu_task->execute_on_a_specific_worker = 1;
  1099. starpu_task_declare_deps_array(new_region->continuation_starpu_task, 1, &implicit_task->starpu_task);
  1100. }
  1101. attr = NULL;
  1102. /*
  1103. * submit all the region implicit starpu tasks
  1104. */
  1105. for (i = 0; i < nb_threads; i++)
  1106. {
  1107. struct starpu_omp_task * implicit_task = new_region->implicit_task_array[i];
  1108. ret = starpu_task_submit(implicit_task->starpu_task);
  1109. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  1110. }
  1111. /*
  1112. * submit the region continuation starpu task if task == initial_task
  1113. */
  1114. if (task == _global_state.initial_task)
  1115. {
  1116. ret = _starpu_task_submit_internally(new_region->continuation_starpu_task);
  1117. STARPU_CHECK_RETURN_VALUE(ret, "_starpu_task_submit_internally");
  1118. }
  1119. /*
  1120. * preempt for completion of the region
  1121. */
  1122. starpu_omp_task_preempt();
  1123. if (task == _global_state.initial_task)
  1124. {
  1125. STARPU_ASSERT(new_region->continuation_starpu_task == NULL);
  1126. }
  1127. else
  1128. {
  1129. STARPU_ASSERT(new_region->continuation_starpu_task != NULL);
  1130. new_region->continuation_starpu_task = NULL;
  1131. }
  1132. /*
  1133. * TODO: free region resources
  1134. */
  1135. for (i = 0; i < nb_threads; i++)
  1136. {
  1137. if (i == 0)
  1138. {
  1139. new_region->master_thread = NULL;
  1140. }
  1141. else
  1142. {
  1143. starpu_omp_thread_list_pop_front(&new_region->thread_list);
  1144. /* TODO: cleanup unused threads */
  1145. }
  1146. new_region->nb_threads--;
  1147. }
  1148. /* implicit tasks will be freed in implicit_task__destroy_callback() */
  1149. free(new_region->implicit_task_array);
  1150. STARPU_ASSERT(new_region->nb_threads == 0);
  1151. task->nested_region = NULL;
  1152. free(new_region->icvs.bind_var);
  1153. free(new_region->icvs.nthreads_var);
  1154. destroy_omp_region_struct(new_region);
  1155. }
  1156. static void wake_up_barrier(struct starpu_omp_region *parallel_region)
  1157. {
  1158. struct starpu_omp_task *task = _starpu_omp_get_task();
  1159. int i;
  1160. for (i = 0; i < parallel_region->nb_threads; i++)
  1161. {
  1162. struct starpu_omp_task * implicit_task = parallel_region->implicit_task_array[i];
  1163. if (implicit_task == task)
  1164. continue;
  1165. weak_task_lock(implicit_task);
  1166. STARPU_ASSERT(implicit_task->wait_on & starpu_omp_task_wait_on_barrier);
  1167. implicit_task->wait_on &= ~starpu_omp_task_wait_on_barrier;
  1168. wake_up_and_unlock_task(implicit_task);
  1169. }
  1170. }
  1171. void starpu_omp_barrier(void)
  1172. {
  1173. struct starpu_omp_task *task = _starpu_omp_get_task();
  1174. /* Assume barriers are performed in by the implicit tasks of a parallel_region */
  1175. STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT);
  1176. struct starpu_omp_region *parallel_region = task->owner_region;
  1177. _starpu_spin_lock(&task->lock);
  1178. int inc_barrier_count = STARPU_ATOMIC_ADD(&parallel_region->barrier_count, 1);
  1179. if (inc_barrier_count == parallel_region->nb_threads)
  1180. {
  1181. /* last task reaching the barrier */
  1182. _starpu_spin_lock(&parallel_region->lock);
  1183. ANNOTATE_HAPPENS_AFTER(&parallel_region->barrier_count);
  1184. ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&parallel_region->barrier_count);
  1185. parallel_region->barrier_count = 0;
  1186. ANNOTATE_HAPPENS_AFTER(&parallel_region->barrier_count);
  1187. ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&parallel_region->barrier_count);
  1188. if (parallel_region->bound_explicit_task_count > 0)
  1189. {
  1190. task->wait_on |= starpu_omp_task_wait_on_region_tasks;
  1191. parallel_region->waiting_task = task;
  1192. task->transaction_pending = 1;
  1193. _starpu_spin_unlock(&parallel_region->lock);
  1194. _starpu_spin_unlock(&task->lock);
  1195. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  1196. starpu_omp_task_preempt();
  1197. }
  1198. else
  1199. {
  1200. _starpu_spin_unlock(&parallel_region->lock);
  1201. _starpu_spin_unlock(&task->lock);
  1202. }
  1203. wake_up_barrier(parallel_region);
  1204. }
  1205. else
  1206. {
  1207. ANNOTATE_HAPPENS_BEFORE(&parallel_region->barrier_count);
  1208. /* not the last task reaching the barrier
  1209. * . prepare for conditional continuation
  1210. * . sleep
  1211. */
  1212. task->wait_on |= starpu_omp_task_wait_on_barrier;
  1213. task->transaction_pending = 1;
  1214. _starpu_spin_unlock(&task->lock);
  1215. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  1216. starpu_omp_task_preempt();
  1217. STARPU_ASSERT(task->child_task_count == 0);
  1218. }
  1219. }
  1220. void starpu_omp_master(void (*f)(void *arg), void *arg)
  1221. {
  1222. if (starpu_omp_master_inline())
  1223. f(arg);
  1224. }
  1225. /* variant of omp_master for inlined code
  1226. * return !0 for the task that should perform the master section
  1227. * return 0 for the tasks that should not perform the master section */
  1228. int starpu_omp_master_inline(void)
  1229. {
  1230. struct starpu_omp_task *task = _starpu_omp_get_task();
  1231. struct starpu_omp_thread *thread = _starpu_omp_get_thread();
  1232. /* Assume master is performed in by the implicit tasks of a region */
  1233. STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT);
  1234. struct starpu_omp_region *region = task->owner_region;
  1235. return thread == region->master_thread;
  1236. }
  1237. void starpu_omp_single(void (*f)(void *arg), void *arg, int nowait)
  1238. {
  1239. if (starpu_omp_single_inline())
  1240. f(arg);
  1241. if (!nowait)
  1242. starpu_omp_barrier();
  1243. }
  1244. /* variant of omp_single for inlined code
  1245. * return !0 for the task that should perform the single section
  1246. * return 0 for the tasks that should not perform the single section
  1247. * wait/nowait should be handled directly by the calling code using starpu_omp_barrier */
  1248. int starpu_omp_single_inline(void)
  1249. {
  1250. struct starpu_omp_task *task = _starpu_omp_get_task();
  1251. /* Assume singles are performed in by the implicit tasks of a region */
  1252. STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT);
  1253. struct starpu_omp_region *region = task->owner_region;
  1254. int first = STARPU_BOOL_COMPARE_AND_SWAP(&region->single_id, task->single_id, task->single_id+1);
  1255. task->single_id++;
  1256. return first;
  1257. }
  1258. void starpu_omp_single_copyprivate(void (*f)(void *arg, void *data, unsigned long long data_size), void *arg, void *data, unsigned long long data_size)
  1259. {
  1260. struct starpu_omp_task *task = _starpu_omp_get_task();
  1261. struct starpu_omp_region *region = task->owner_region;
  1262. int first = starpu_omp_single_inline();
  1263. if (first)
  1264. {
  1265. region->copy_private_data = data;
  1266. f(arg, data, data_size);
  1267. }
  1268. starpu_omp_barrier();
  1269. if (!first)
  1270. memcpy(data, region->copy_private_data, data_size);
  1271. starpu_omp_barrier();
  1272. }
  1273. void *starpu_omp_single_copyprivate_inline_begin(void *data)
  1274. {
  1275. struct starpu_omp_task *task = _starpu_omp_get_task();
  1276. struct starpu_omp_region *region = task->owner_region;
  1277. int first = starpu_omp_single_inline();
  1278. if (first)
  1279. {
  1280. task->single_first = 1;
  1281. region->copy_private_data = data;
  1282. return NULL;
  1283. }
  1284. starpu_omp_barrier();
  1285. return region->copy_private_data;
  1286. }
  1287. void starpu_omp_single_copyprivate_inline_end(void)
  1288. {
  1289. struct starpu_omp_task *task = _starpu_omp_get_task();
  1290. /* Assume singles are performed in by the implicit tasks of a region */
  1291. STARPU_ASSERT(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT);
  1292. if (task->single_first)
  1293. {
  1294. task->single_first = 0;
  1295. starpu_omp_barrier();
  1296. }
  1297. starpu_omp_barrier();
  1298. }
  1299. void starpu_omp_critical(void (*f)(void *arg), void *arg, const char *name)
  1300. {
  1301. starpu_omp_critical_inline_begin(name);
  1302. f(arg);
  1303. starpu_omp_critical_inline_end(name);
  1304. }
  1305. void starpu_omp_critical_inline_begin(const char *name)
  1306. {
  1307. struct starpu_omp_task *task = _starpu_omp_get_task();
  1308. struct starpu_omp_critical *critical = NULL;
  1309. struct starpu_omp_task_link link;
  1310. if (name)
  1311. {
  1312. _starpu_spin_lock(&_global_state.named_criticals_lock);
  1313. HASH_FIND_STR(_global_state.named_criticals, name, critical);
  1314. if (critical == NULL)
  1315. {
  1316. critical = create_omp_critical_struct();
  1317. critical->name = name;
  1318. HASH_ADD_STR(_global_state.named_criticals, name, critical);
  1319. }
  1320. _starpu_spin_unlock(&_global_state.named_criticals_lock);
  1321. }
  1322. else
  1323. {
  1324. critical = _global_state.default_critical;
  1325. }
  1326. _starpu_spin_lock(&critical->lock);
  1327. while (critical->state != 0)
  1328. {
  1329. _starpu_spin_lock(&task->lock);
  1330. task->wait_on |= starpu_omp_task_wait_on_critical;
  1331. task->transaction_pending = 1;
  1332. link.task = task;
  1333. link.next = critical->contention_list_head;
  1334. critical->contention_list_head = &link;
  1335. _starpu_spin_unlock(&task->lock);
  1336. _starpu_spin_unlock(&critical->lock);
  1337. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  1338. starpu_omp_task_preempt();
  1339. /* re-acquire the spin lock */
  1340. _starpu_spin_lock(&critical->lock);
  1341. }
  1342. critical->state = 1;
  1343. _starpu_spin_unlock(&critical->lock);
  1344. }
  1345. void starpu_omp_critical_inline_end(const char *name)
  1346. {
  1347. struct starpu_omp_critical *critical = NULL;
  1348. if (name)
  1349. {
  1350. _starpu_spin_lock(&_global_state.named_criticals_lock);
  1351. HASH_FIND_STR(_global_state.named_criticals, name, critical);
  1352. _starpu_spin_unlock(&_global_state.named_criticals_lock);
  1353. }
  1354. else
  1355. {
  1356. critical = _global_state.default_critical;
  1357. }
  1358. STARPU_ASSERT(critical != NULL);
  1359. _starpu_spin_lock(&critical->lock);
  1360. STARPU_ASSERT(critical->state == 1);
  1361. critical->state = 0;
  1362. if (critical->contention_list_head != NULL)
  1363. {
  1364. struct starpu_omp_task *next_task = critical->contention_list_head->task;
  1365. weak_task_lock(next_task);
  1366. critical->contention_list_head = critical->contention_list_head->next;
  1367. STARPU_ASSERT(next_task->wait_on & starpu_omp_task_wait_on_critical);
  1368. next_task->wait_on &= ~starpu_omp_task_wait_on_critical;
  1369. wake_up_and_unlock_task(next_task);
  1370. }
  1371. _starpu_spin_unlock(&critical->lock);
  1372. }
  1373. static void explicit_task__destroy_callback(void *_task)
  1374. {
  1375. struct starpu_omp_task *task = _task;
  1376. STARPU_ASSERT(!(task->flags & STARPU_OMP_TASK_FLAGS_IMPLICIT));
  1377. task->starpu_task->omp_task = NULL;
  1378. task->starpu_task = NULL;
  1379. _starpu_spin_lock(&task->lock);
  1380. if (task->state != starpu_omp_task_state_target)
  1381. {
  1382. STARPU_ASSERT(task->transaction_pending == 1);
  1383. task->transaction_pending = 0;
  1384. if (task->child_task_count != 0)
  1385. {
  1386. task->state = starpu_omp_task_state_zombie;
  1387. _starpu_spin_unlock(&task->lock);
  1388. return;
  1389. }
  1390. }
  1391. _starpu_spin_unlock(&task->lock);
  1392. destroy_omp_task_struct(task);
  1393. }
  1394. void starpu_omp_task_region(const struct starpu_omp_task_region_attr *attr)
  1395. {
  1396. struct starpu_omp_task *generating_task = _starpu_omp_get_task();
  1397. struct starpu_omp_region *parallel_region = generating_task->owner_region;
  1398. int is_undeferred = 0;
  1399. int is_final = 0;
  1400. int is_included = 0;
  1401. int is_merged = 0;
  1402. int ret;
  1403. if (generating_task == _global_state.initial_task)
  1404. {
  1405. is_undeferred = 1;
  1406. is_final = 1;
  1407. is_included = 1;
  1408. }
  1409. else
  1410. {
  1411. if (!attr->if_clause)
  1412. {
  1413. is_undeferred = 1;
  1414. }
  1415. if (generating_task->flags & STARPU_OMP_TASK_FLAGS_FINAL)
  1416. {
  1417. is_final = 1;
  1418. is_included = 1;
  1419. }
  1420. else if (attr->final_clause)
  1421. {
  1422. is_final = 1;
  1423. }
  1424. if (is_included)
  1425. {
  1426. is_undeferred = 1;
  1427. }
  1428. if ((is_undeferred || is_included) & attr->mergeable_clause)
  1429. {
  1430. is_merged = 1;
  1431. }
  1432. }
  1433. if (is_merged || is_included)
  1434. {
  1435. if (is_included)
  1436. {
  1437. /* TODO: backup current ICVs and setup new ICVs for the included task */
  1438. }
  1439. int i;
  1440. unsigned n = attr->cl.nbuffers;
  1441. if (n == 0)
  1442. n = 1;
  1443. void *data_interfaces[n];
  1444. for (i = 0; i < attr->cl.nbuffers; i++)
  1445. {
  1446. starpu_data_handle_t handle = attr->handles[i];
  1447. ret = starpu_data_acquire(handle, attr->cl.modes[i]);
  1448. STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
  1449. data_interfaces[i] = starpu_data_get_interface_on_node(handle, handle->home_node);
  1450. }
  1451. void (*f)(void **starpu_buffers, void *starpu_cl_arg) = attr->cl.cpu_funcs[0];
  1452. f(data_interfaces, attr->cl_arg);
  1453. for (i = 0; i < attr->cl.nbuffers; i++)
  1454. {
  1455. starpu_data_release(attr->handles[i]);
  1456. }
  1457. if (attr->cl_arg_free)
  1458. {
  1459. free(attr->cl_arg);
  1460. }
  1461. if (is_included)
  1462. {
  1463. /* TODO: restore backuped ICVs */
  1464. }
  1465. }
  1466. else
  1467. {
  1468. struct starpu_omp_task *generated_task =
  1469. create_omp_task_struct(generating_task, NULL, parallel_region, 0);
  1470. generated_task->cl = attr->cl;
  1471. if (attr->untied_clause)
  1472. {
  1473. generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNTIED;
  1474. }
  1475. if (is_final)
  1476. {
  1477. generated_task->flags |= STARPU_OMP_TASK_FLAGS_FINAL;
  1478. }
  1479. if (is_undeferred)
  1480. {
  1481. generated_task->flags |= STARPU_OMP_TASK_FLAGS_UNDEFERRED;
  1482. }
  1483. generated_task->task_group = generating_task->task_group;
  1484. generated_task->rank = -1;
  1485. /*
  1486. * save pointer to the regions user function from the task region codelet
  1487. *
  1488. * TODO: add support for multiple/heterogeneous implementations
  1489. */
  1490. if (generated_task->cl.cpu_funcs[0])
  1491. {
  1492. generated_task->cpu_f = generated_task->cl.cpu_funcs[0];
  1493. /*
  1494. * plug the task wrapper into the task region codelet instead, to support task preemption
  1495. */
  1496. generated_task->cl.cpu_funcs[0] = starpu_omp_explicit_task_exec;
  1497. }
  1498. #ifdef STARPU_USE_CUDA
  1499. if (generated_task->cl.cuda_funcs[0])
  1500. {
  1501. generated_task->cuda_f = generated_task->cl.cuda_funcs[0];
  1502. #if 1
  1503. /* we assume for now that Cuda task won't block, thus we don't need
  1504. * to initialize the StarPU OpenMP Runtime Support context for enabling
  1505. * continuations on Cuda tasks */
  1506. generated_task->state = starpu_omp_task_state_target;
  1507. #else
  1508. generated_task->cl.cuda_funcs[0] = starpu_omp_explicit_task_exec;
  1509. #endif
  1510. }
  1511. #endif
  1512. #ifdef STARPU_USE_OPENCL
  1513. if (generated_task->cl.opencl_funcs[0])
  1514. {
  1515. generated_task->opencl_f = generated_task->cl.opencl_funcs[0];
  1516. #if 1
  1517. /* we assume for now that OpenCL task won't block, thus we don't need
  1518. * to initialize the StarPU OpenMP Runtime Support context for enabling
  1519. * continuations on OpenCL tasks */
  1520. generated_task->state = starpu_omp_task_state_target;
  1521. #else
  1522. generated_task->cl.opencl_funcs[0] = starpu_omp_explicit_task_exec;
  1523. #endif
  1524. }
  1525. #endif
  1526. /* TODO: add other accelerator support */
  1527. generated_task->starpu_task = starpu_task_create();
  1528. generated_task->starpu_task->cl = &generated_task->cl;
  1529. generated_task->starpu_task->cl_arg = attr->cl_arg;
  1530. generated_task->starpu_task->cl_arg_size = attr->cl_arg_size;
  1531. generated_task->starpu_task->cl_arg_free = attr->cl_arg_free;
  1532. generated_task->starpu_task->priority = attr->priority;
  1533. {
  1534. int i;
  1535. for (i = 0; i < generated_task->cl.nbuffers; i++)
  1536. {
  1537. generated_task->starpu_task->handles[i] = attr->handles[i];
  1538. }
  1539. }
  1540. generated_task->starpu_task->omp_task = generated_task;
  1541. _starpu_task_set_omp_cleanup_callback(generated_task->starpu_task, explicit_task__destroy_callback, generated_task);
  1542. /* if the task is tied, execute_on_a_specific_worker will be changed to 1
  1543. * upon the first preemption of the generated task, once we know
  1544. * which worker thread has been selected */
  1545. generated_task->starpu_task->execute_on_a_specific_worker = 0;
  1546. (void)STARPU_ATOMIC_ADD(&generating_task->child_task_count, 1);
  1547. (void)STARPU_ATOMIC_ADD(&parallel_region->bound_explicit_task_count, 1);
  1548. if (generated_task->task_group)
  1549. {
  1550. (void)STARPU_ATOMIC_ADD(&generated_task->task_group->descendent_task_count, 1);
  1551. }
  1552. /* do not use the attribute struct afterward as it may become out of scope */
  1553. attr = NULL;
  1554. if (is_undeferred)
  1555. {
  1556. _starpu_task_prepare_for_continuation();
  1557. starpu_task_declare_deps_array(generating_task->starpu_task, 1,
  1558. &generated_task->starpu_task);
  1559. }
  1560. ret = starpu_task_submit(generated_task->starpu_task);
  1561. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  1562. if (is_undeferred)
  1563. {
  1564. starpu_omp_task_preempt();
  1565. }
  1566. }
  1567. }
  1568. void starpu_omp_taskwait(void)
  1569. {
  1570. struct starpu_omp_task *task = _starpu_omp_get_task();
  1571. _starpu_spin_lock(&task->lock);
  1572. if (task->child_task_count > 0)
  1573. {
  1574. task->wait_on |= starpu_omp_task_wait_on_task_childs;
  1575. task->transaction_pending = 1;
  1576. _starpu_spin_unlock(&task->lock);
  1577. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  1578. starpu_omp_task_preempt();
  1579. STARPU_ASSERT(task->child_task_count == 0);
  1580. }
  1581. else
  1582. {
  1583. _starpu_spin_unlock(&task->lock);
  1584. }
  1585. }
  1586. void starpu_omp_taskgroup(void (*f)(void *arg), void *arg)
  1587. {
  1588. struct starpu_omp_task *task = _starpu_omp_get_task();
  1589. struct starpu_omp_task_group task_group;
  1590. task_group.p_previous_task_group = task->task_group;
  1591. task_group.descendent_task_count = 0;
  1592. task_group.leader_task = task;
  1593. task->task_group = &task_group;
  1594. f(arg);
  1595. _starpu_spin_lock(&task->lock);
  1596. if (task_group.descendent_task_count > 0)
  1597. {
  1598. task->wait_on |= starpu_omp_task_wait_on_group;
  1599. task->transaction_pending = 1;
  1600. _starpu_spin_unlock(&task->lock);
  1601. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  1602. starpu_omp_task_preempt();
  1603. STARPU_ASSERT(task_group.descendent_task_count == 0);
  1604. }
  1605. else
  1606. {
  1607. _starpu_spin_unlock(&task->lock);
  1608. }
  1609. task->task_group = task_group.p_previous_task_group;
  1610. }
  1611. void starpu_omp_taskgroup_inline_begin(void)
  1612. {
  1613. struct starpu_omp_task *task = _starpu_omp_get_task();
  1614. struct starpu_omp_task_group *p_task_group;
  1615. _STARPU_MALLOC(p_task_group, sizeof(*p_task_group));
  1616. p_task_group->p_previous_task_group = task->task_group;
  1617. p_task_group->descendent_task_count = 0;
  1618. p_task_group->leader_task = task;
  1619. task->task_group = p_task_group;
  1620. }
  1621. void starpu_omp_taskgroup_inline_end(void)
  1622. {
  1623. struct starpu_omp_task *task = _starpu_omp_get_task();
  1624. _starpu_spin_lock(&task->lock);
  1625. struct starpu_omp_task_group *p_task_group = task->task_group;
  1626. if (p_task_group->descendent_task_count > 0)
  1627. {
  1628. task->wait_on |= starpu_omp_task_wait_on_group;
  1629. task->transaction_pending = 1;
  1630. _starpu_spin_unlock(&task->lock);
  1631. _starpu_task_prepare_for_continuation_ext(0, transaction_callback, task);
  1632. starpu_omp_task_preempt();
  1633. STARPU_ASSERT(p_task_group->descendent_task_count == 0);
  1634. }
  1635. else
  1636. {
  1637. _starpu_spin_unlock(&task->lock);
  1638. }
  1639. task->task_group = p_task_group->p_previous_task_group;
  1640. free(p_task_group);
  1641. }
  1642. static inline void _starpu_omp_for_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
  1643. struct starpu_omp_loop *loop, int first_call,
  1644. unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
  1645. {
  1646. *_nb_i = 0;
  1647. if (schedule == starpu_omp_sched_undefined)
  1648. {
  1649. schedule = parallel_region->owner_device->icvs.def_sched_var;
  1650. chunk = parallel_region->owner_device->icvs.def_sched_chunk_var;
  1651. }
  1652. else if (schedule == starpu_omp_sched_runtime)
  1653. {
  1654. schedule = parallel_region->icvs.run_sched_var;
  1655. chunk = parallel_region->icvs.run_sched_chunk_var;
  1656. }
  1657. STARPU_ASSERT( schedule == starpu_omp_sched_static
  1658. || schedule == starpu_omp_sched_dynamic
  1659. || schedule == starpu_omp_sched_guided
  1660. || schedule == starpu_omp_sched_auto);
  1661. if (schedule == starpu_omp_sched_auto)
  1662. {
  1663. schedule = starpu_omp_sched_static;
  1664. chunk = 0;
  1665. }
  1666. if (schedule == starpu_omp_sched_static)
  1667. {
  1668. if (chunk > 0)
  1669. {
  1670. if (first_call)
  1671. {
  1672. *_first_i = task->rank * chunk;
  1673. }
  1674. else
  1675. {
  1676. *_first_i += parallel_region->nb_threads * chunk;
  1677. }
  1678. if (*_first_i < nb_iterations)
  1679. {
  1680. if (*_first_i + chunk > nb_iterations)
  1681. {
  1682. *_nb_i = nb_iterations - *_first_i;
  1683. }
  1684. else
  1685. {
  1686. *_nb_i = chunk;
  1687. }
  1688. }
  1689. }
  1690. else
  1691. {
  1692. if (first_call)
  1693. {
  1694. *_nb_i = nb_iterations / parallel_region->nb_threads;
  1695. *_first_i = (unsigned)task->rank * (*_nb_i);
  1696. unsigned long long remainder = nb_iterations % parallel_region->nb_threads;
  1697. if (remainder > 0)
  1698. {
  1699. if ((unsigned)task->rank < remainder)
  1700. {
  1701. (*_nb_i)++;
  1702. *_first_i += (unsigned)task->rank;
  1703. }
  1704. else
  1705. {
  1706. *_first_i += remainder;
  1707. }
  1708. }
  1709. }
  1710. }
  1711. }
  1712. else if (schedule == starpu_omp_sched_dynamic)
  1713. {
  1714. if (chunk == 0)
  1715. {
  1716. chunk = 1;
  1717. }
  1718. if (first_call)
  1719. {
  1720. *_first_i = 0;
  1721. }
  1722. _starpu_spin_lock(&parallel_region->lock);
  1723. if (loop->next_iteration < nb_iterations)
  1724. {
  1725. *_first_i = loop->next_iteration;
  1726. if (*_first_i + chunk > nb_iterations)
  1727. {
  1728. *_nb_i = nb_iterations - *_first_i;
  1729. }
  1730. else
  1731. {
  1732. *_nb_i = chunk;
  1733. }
  1734. loop->next_iteration += *_nb_i;
  1735. }
  1736. _starpu_spin_unlock(&parallel_region->lock);
  1737. }
  1738. else if (schedule == starpu_omp_sched_guided)
  1739. {
  1740. if (chunk == 0)
  1741. {
  1742. chunk = 1;
  1743. }
  1744. if (first_call)
  1745. {
  1746. *_first_i = 0;
  1747. }
  1748. _starpu_spin_lock(&parallel_region->lock);
  1749. if (loop->next_iteration < nb_iterations)
  1750. {
  1751. *_first_i = loop->next_iteration;
  1752. *_nb_i = (nb_iterations - *_first_i)/parallel_region->nb_threads;
  1753. if (*_nb_i < chunk)
  1754. {
  1755. if (*_first_i+chunk > nb_iterations)
  1756. {
  1757. *_nb_i = nb_iterations - *_first_i;
  1758. }
  1759. else
  1760. {
  1761. *_nb_i = chunk;
  1762. }
  1763. }
  1764. loop->next_iteration += *_nb_i;
  1765. }
  1766. _starpu_spin_unlock(&parallel_region->lock);
  1767. }
  1768. if (ordered)
  1769. {
  1770. task->ordered_first_i = *_first_i;
  1771. task->ordered_nb_i = *_nb_i;
  1772. }
  1773. }
  1774. static inline struct starpu_omp_loop *_starpu_omp_for_get_loop(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task)
  1775. {
  1776. struct starpu_omp_loop *loop;
  1777. loop = parallel_region->loop_list;
  1778. while (loop && loop->id != task->loop_id)
  1779. {
  1780. loop = loop->next_loop;
  1781. }
  1782. return loop;
  1783. }
  1784. static inline struct starpu_omp_loop *_starpu_omp_for_loop_begin(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
  1785. int ordered)
  1786. {
  1787. struct starpu_omp_loop *loop;
  1788. _starpu_spin_lock(&parallel_region->lock);
  1789. loop = _starpu_omp_for_get_loop(parallel_region, task);
  1790. if (!loop)
  1791. {
  1792. _STARPU_MALLOC(loop, sizeof(*loop));
  1793. loop->id = task->loop_id;
  1794. loop->next_iteration = 0;
  1795. loop->nb_completed_threads = 0;
  1796. loop->next_loop = parallel_region->loop_list;
  1797. parallel_region->loop_list = loop;
  1798. if (ordered)
  1799. {
  1800. loop->ordered_iteration = 0;
  1801. _starpu_spin_init(&loop->ordered_lock);
  1802. condition_init(&loop->ordered_cond);
  1803. }
  1804. }
  1805. _starpu_spin_unlock(&parallel_region->lock);
  1806. return loop;
  1807. }
  1808. static inline void _starpu_omp_for_loop_end(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
  1809. struct starpu_omp_loop *loop, int ordered)
  1810. {
  1811. _starpu_spin_lock(&parallel_region->lock);
  1812. loop->nb_completed_threads++;
  1813. if (loop->nb_completed_threads == parallel_region->nb_threads)
  1814. {
  1815. struct starpu_omp_loop **p_loop;
  1816. if (ordered)
  1817. {
  1818. loop->ordered_iteration = 0;
  1819. condition_exit(&loop->ordered_cond);
  1820. _starpu_spin_destroy(&loop->ordered_lock);
  1821. }
  1822. STARPU_ASSERT(loop->next_loop == NULL);
  1823. p_loop = &(parallel_region->loop_list);
  1824. while (*p_loop != loop)
  1825. {
  1826. p_loop = &((*p_loop)->next_loop);
  1827. }
  1828. *p_loop = NULL;
  1829. free(loop);
  1830. }
  1831. _starpu_spin_unlock(&parallel_region->lock);
  1832. task->loop_id++;
  1833. }
  1834. int starpu_omp_for_inline_first(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
  1835. {
  1836. struct starpu_omp_task *task = _starpu_omp_get_task();
  1837. struct starpu_omp_region *parallel_region = task->owner_region;
  1838. struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered);
  1839. _starpu_omp_for_loop(parallel_region, task, loop, 1, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i);
  1840. if (*_nb_i == 0)
  1841. {
  1842. _starpu_omp_for_loop_end(parallel_region, task, loop, ordered);
  1843. }
  1844. return (*_nb_i != 0);
  1845. }
  1846. int starpu_omp_for_inline_next(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_first_i, unsigned long long *_nb_i)
  1847. {
  1848. struct starpu_omp_task *task = _starpu_omp_get_task();
  1849. struct starpu_omp_region *parallel_region = task->owner_region;
  1850. struct starpu_omp_loop *loop = _starpu_omp_for_loop_begin(parallel_region, task, ordered);
  1851. _starpu_omp_for_loop(parallel_region, task, loop, 0, nb_iterations, chunk, schedule, ordered, _first_i, _nb_i);
  1852. if (*_nb_i == 0)
  1853. {
  1854. _starpu_omp_for_loop_end(parallel_region, task, loop, ordered);
  1855. }
  1856. return (*_nb_i != 0);
  1857. }
  1858. int starpu_omp_for_inline_first_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
  1859. {
  1860. unsigned long long nb_i;
  1861. int end = starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i);
  1862. *_end_i = *_begin_i + nb_i;
  1863. return end;
  1864. }
  1865. int starpu_omp_for_inline_next_alt(unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, unsigned long long *_begin_i, unsigned long long *_end_i)
  1866. {
  1867. unsigned long long nb_i;
  1868. int end = starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, _begin_i, &nb_i);
  1869. *_end_i = *_begin_i + nb_i;
  1870. return end;
  1871. }
  1872. void starpu_omp_for(void (*f)(unsigned long long _first_i, unsigned long long _nb_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
  1873. {
  1874. unsigned long long _first_i = 0;
  1875. unsigned long long _nb_i = 0;
  1876. if (starpu_omp_for_inline_first(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i))
  1877. {
  1878. do
  1879. {
  1880. f(_first_i, _nb_i, arg);
  1881. }
  1882. while (starpu_omp_for_inline_next(nb_iterations, chunk, schedule, ordered, &_first_i, &_nb_i));
  1883. }
  1884. if (!nowait)
  1885. {
  1886. starpu_omp_barrier();
  1887. }
  1888. }
  1889. void starpu_omp_for_alt(void (*f)(unsigned long long _begin_i, unsigned long long _end_i, void *arg), void *arg, unsigned long long nb_iterations, unsigned long long chunk, int schedule, int ordered, int nowait)
  1890. {
  1891. unsigned long long _begin_i = 0;
  1892. unsigned long long _end_i = 0;
  1893. if (starpu_omp_for_inline_first_alt(nb_iterations, chunk, schedule, ordered, &_begin_i, &_end_i))
  1894. {
  1895. do
  1896. {
  1897. f(_begin_i, _end_i, arg);
  1898. }
  1899. while (starpu_omp_for_inline_next_alt(nb_iterations, chunk, schedule, ordered, &_begin_i, &_end_i));
  1900. }
  1901. if (!nowait)
  1902. {
  1903. starpu_omp_barrier();
  1904. }
  1905. }
  1906. void starpu_omp_ordered(void (*f)(void *arg), void *arg)
  1907. {
  1908. starpu_omp_ordered_inline_begin();
  1909. f(arg);
  1910. starpu_omp_ordered_inline_end();
  1911. }
  1912. void starpu_omp_ordered_inline_begin(void)
  1913. {
  1914. struct starpu_omp_task *task = _starpu_omp_get_task();
  1915. struct starpu_omp_region *parallel_region = task->owner_region;
  1916. struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task);
  1917. unsigned long long i;
  1918. STARPU_ASSERT(task->ordered_nb_i > 0);
  1919. i = task->ordered_first_i;
  1920. task->ordered_first_i++;
  1921. task->ordered_nb_i--;
  1922. _starpu_spin_lock(&loop->ordered_lock);
  1923. while (i != loop->ordered_iteration)
  1924. {
  1925. STARPU_ASSERT(i > loop->ordered_iteration);
  1926. condition_wait(&loop->ordered_cond, &loop->ordered_lock, starpu_omp_task_wait_on_ordered);
  1927. }
  1928. }
  1929. void starpu_omp_ordered_inline_end(void)
  1930. {
  1931. struct starpu_omp_task *task = _starpu_omp_get_task();
  1932. struct starpu_omp_region *parallel_region = task->owner_region;
  1933. struct starpu_omp_loop *loop = _starpu_omp_for_get_loop(parallel_region, task);
  1934. loop->ordered_iteration++;
  1935. condition_broadcast(&loop->ordered_cond, starpu_omp_task_wait_on_ordered);
  1936. _starpu_spin_unlock(&loop->ordered_lock);
  1937. }
  1938. static inline struct starpu_omp_sections *_starpu_omp_get_sections(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task)
  1939. {
  1940. struct starpu_omp_sections *sections;
  1941. sections = parallel_region->sections_list;
  1942. while (sections && sections->id != task->sections_id)
  1943. {
  1944. sections = sections->next_sections;
  1945. }
  1946. return sections;
  1947. }
  1948. static inline struct starpu_omp_sections *_starpu_omp_sections_begin(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task)
  1949. {
  1950. struct starpu_omp_sections *sections;
  1951. _starpu_spin_lock(&parallel_region->lock);
  1952. sections = _starpu_omp_get_sections(parallel_region, task);
  1953. if (!sections)
  1954. {
  1955. _STARPU_MALLOC(sections, sizeof(*sections));
  1956. sections->id = task->sections_id;
  1957. sections->next_section_num = 0;
  1958. sections->nb_completed_threads = 0;
  1959. sections->next_sections = parallel_region->sections_list;
  1960. parallel_region->sections_list = sections;
  1961. }
  1962. _starpu_spin_unlock(&parallel_region->lock);
  1963. return sections;
  1964. }
  1965. static inline void _starpu_omp_sections_end(struct starpu_omp_region *parallel_region, struct starpu_omp_task *task,
  1966. struct starpu_omp_sections *sections)
  1967. {
  1968. _starpu_spin_lock(&parallel_region->lock);
  1969. sections->nb_completed_threads++;
  1970. if (sections->nb_completed_threads == parallel_region->nb_threads)
  1971. {
  1972. struct starpu_omp_sections **p_sections;
  1973. STARPU_ASSERT(sections->next_sections == NULL);
  1974. p_sections = &(parallel_region->sections_list);
  1975. while (*p_sections != sections)
  1976. {
  1977. p_sections = &((*p_sections)->next_sections);
  1978. }
  1979. *p_sections = NULL;
  1980. free(sections);
  1981. }
  1982. _starpu_spin_unlock(&parallel_region->lock);
  1983. task->sections_id++;
  1984. }
  1985. void starpu_omp_sections(unsigned long long nb_sections, void (**section_f)(void *arg), void **section_arg, int nowait)
  1986. {
  1987. struct starpu_omp_task *task = _starpu_omp_get_task();
  1988. struct starpu_omp_region *parallel_region = task->owner_region;
  1989. struct starpu_omp_sections *sections = _starpu_omp_sections_begin(parallel_region, task);
  1990. for (;;)
  1991. {
  1992. void (*f)(void *arg) = NULL;
  1993. void *arg = NULL;
  1994. _starpu_spin_lock(&parallel_region->lock);
  1995. if (sections->next_section_num < nb_sections)
  1996. {
  1997. f = section_f[sections->next_section_num];
  1998. arg = section_arg[sections->next_section_num];
  1999. sections->next_section_num ++;
  2000. }
  2001. _starpu_spin_unlock(&parallel_region->lock);
  2002. if (f == NULL)
  2003. break;
  2004. f(arg);
  2005. }
  2006. _starpu_omp_sections_end(parallel_region, task, sections);
  2007. if (!nowait)
  2008. {
  2009. starpu_omp_barrier();
  2010. }
  2011. }
  2012. void starpu_omp_sections_combined(unsigned long long nb_sections, void (*section_f)(unsigned long long section_num, void *arg), void *section_arg, int nowait)
  2013. {
  2014. struct starpu_omp_task *task = _starpu_omp_get_task();
  2015. struct starpu_omp_region *parallel_region = task->owner_region;
  2016. struct starpu_omp_sections *sections = _starpu_omp_sections_begin(parallel_region, task);
  2017. for (;;)
  2018. {
  2019. unsigned long long section_num;
  2020. void *arg = NULL;
  2021. _starpu_spin_lock(&parallel_region->lock);
  2022. if (sections->next_section_num < nb_sections)
  2023. {
  2024. section_num = sections->next_section_num;
  2025. arg = section_arg;
  2026. sections->next_section_num ++;
  2027. }
  2028. else
  2029. {
  2030. _starpu_spin_unlock(&parallel_region->lock);
  2031. break;
  2032. }
  2033. _starpu_spin_unlock(&parallel_region->lock);
  2034. section_f(section_num, arg);
  2035. }
  2036. _starpu_omp_sections_end(parallel_region, task, sections);
  2037. if (!nowait)
  2038. {
  2039. starpu_omp_barrier();
  2040. }
  2041. }
  2042. static void _starpu_omp_lock_init(void **_internal)
  2043. {
  2044. struct _starpu_omp_lock_internal *_lock;
  2045. _STARPU_CALLOC(_lock, 1, sizeof(*_lock));
  2046. _starpu_spin_init(&_lock->lock);
  2047. condition_init(&_lock->cond);
  2048. *_internal = _lock;
  2049. }
  2050. static void _starpu_omp_lock_destroy(void **_internal)
  2051. {
  2052. struct _starpu_omp_lock_internal * const _lock = *_internal;
  2053. STARPU_ASSERT(_lock->state == 0);
  2054. condition_exit(&_lock->cond);
  2055. _starpu_spin_destroy(&_lock->lock);
  2056. memset(_lock, 0, sizeof(*_lock));
  2057. free(_lock);
  2058. *_internal = NULL;
  2059. }
  2060. static void _starpu_omp_lock_set(void **_internal)
  2061. {
  2062. struct _starpu_omp_lock_internal * const _lock = *_internal;
  2063. _starpu_spin_lock(&_lock->lock);
  2064. while (_lock->state != 0)
  2065. {
  2066. condition_wait(&_lock->cond, &_lock->lock, starpu_omp_task_wait_on_lock);
  2067. }
  2068. _lock->state = 1;
  2069. _starpu_spin_unlock(&_lock->lock);
  2070. }
  2071. static void _starpu_omp_lock_unset(void **_internal)
  2072. {
  2073. struct _starpu_omp_lock_internal * const _lock = *_internal;
  2074. _starpu_spin_lock(&_lock->lock);
  2075. STARPU_ASSERT(_lock->state == 1);
  2076. _lock->state = 0;
  2077. condition_broadcast(&_lock->cond, starpu_omp_task_wait_on_lock);
  2078. _starpu_spin_unlock(&_lock->lock);
  2079. }
  2080. static int _starpu_omp_lock_test(void **_internal)
  2081. {
  2082. struct _starpu_omp_lock_internal * const _lock = *_internal;
  2083. int ret = 0;
  2084. _starpu_spin_lock(&_lock->lock);
  2085. if (_lock->state == 0)
  2086. {
  2087. _lock->state = 1;
  2088. ret = 1;
  2089. }
  2090. _starpu_spin_unlock(&_lock->lock);
  2091. return ret;
  2092. }
  2093. static void _starpu_omp_nest_lock_init(void **_internal)
  2094. {
  2095. struct _starpu_omp_nest_lock_internal *_nest_lock;
  2096. _STARPU_CALLOC(_nest_lock, 1, sizeof(*_nest_lock));
  2097. _starpu_spin_init(&_nest_lock->lock);
  2098. condition_init(&_nest_lock->cond);
  2099. *_internal = _nest_lock;
  2100. }
  2101. static void _starpu_omp_nest_lock_destroy(void **_internal)
  2102. {
  2103. struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal;
  2104. STARPU_ASSERT(_nest_lock->state == 0);
  2105. STARPU_ASSERT(_nest_lock->nesting == 0);
  2106. STARPU_ASSERT(_nest_lock->owner_task == NULL);
  2107. condition_exit(&_nest_lock->cond);
  2108. _starpu_spin_destroy(&_nest_lock->lock);
  2109. memset(_nest_lock, 0, sizeof(*_nest_lock));
  2110. free(_nest_lock);
  2111. *_internal = NULL;
  2112. }
  2113. static void _starpu_omp_nest_lock_set(void **_internal)
  2114. {
  2115. struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal;
  2116. struct starpu_omp_task * const task = _starpu_omp_get_task();
  2117. _starpu_spin_lock(&_nest_lock->lock);
  2118. if (_nest_lock->owner_task == task)
  2119. {
  2120. STARPU_ASSERT(_nest_lock->state == 1);
  2121. STARPU_ASSERT(_nest_lock->nesting > 0);
  2122. _nest_lock->nesting++;
  2123. }
  2124. else
  2125. {
  2126. while (_nest_lock->state != 0)
  2127. {
  2128. condition_wait(&_nest_lock->cond, &_nest_lock->lock, starpu_omp_task_wait_on_nest_lock);
  2129. }
  2130. STARPU_ASSERT(_nest_lock->nesting == 0);
  2131. STARPU_ASSERT(_nest_lock->owner_task == NULL);
  2132. _nest_lock->state = 1;
  2133. _nest_lock->owner_task = task;
  2134. _nest_lock->nesting = 1;
  2135. }
  2136. _starpu_spin_unlock(&_nest_lock->lock);
  2137. }
  2138. static void _starpu_omp_nest_lock_unset(void **_internal)
  2139. {
  2140. struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal;
  2141. struct starpu_omp_task * const task = _starpu_omp_get_task();
  2142. _starpu_spin_lock(&_nest_lock->lock);
  2143. STARPU_ASSERT(_nest_lock->owner_task == task);
  2144. STARPU_ASSERT(_nest_lock->state == 1);
  2145. STARPU_ASSERT(_nest_lock->nesting > 0);
  2146. _nest_lock->nesting--;
  2147. if (_nest_lock->nesting == 0)
  2148. {
  2149. _nest_lock->state = 0;
  2150. _nest_lock->owner_task = NULL;
  2151. condition_broadcast(&_nest_lock->cond, starpu_omp_task_wait_on_nest_lock);
  2152. }
  2153. _starpu_spin_unlock(&_nest_lock->lock);
  2154. }
  2155. static int _starpu_omp_nest_lock_test(void **_internal)
  2156. {
  2157. struct _starpu_omp_nest_lock_internal * const _nest_lock = *_internal;
  2158. struct starpu_omp_task * const task = _starpu_omp_get_task();
  2159. int ret = 0;
  2160. _starpu_spin_lock(&_nest_lock->lock);
  2161. if (_nest_lock->state == 0)
  2162. {
  2163. STARPU_ASSERT(_nest_lock->nesting == 0);
  2164. STARPU_ASSERT(_nest_lock->owner_task == NULL);
  2165. _nest_lock->state = 1;
  2166. _nest_lock->owner_task = task;
  2167. _nest_lock->nesting = 1;
  2168. ret = _nest_lock->nesting;
  2169. }
  2170. else if (_nest_lock->owner_task == task)
  2171. {
  2172. STARPU_ASSERT(_nest_lock->state == 1);
  2173. STARPU_ASSERT(_nest_lock->nesting > 0);
  2174. _nest_lock->nesting++;
  2175. ret = _nest_lock->nesting;
  2176. }
  2177. _starpu_spin_unlock(&_nest_lock->lock);
  2178. return ret;
  2179. }
  2180. void starpu_omp_init_lock (starpu_omp_lock_t *lock)
  2181. {
  2182. _starpu_omp_lock_init(&lock->internal);
  2183. }
  2184. void starpu_omp_destroy_lock (starpu_omp_lock_t *lock)
  2185. {
  2186. _starpu_omp_lock_destroy(&lock->internal);
  2187. }
  2188. void starpu_omp_set_lock (starpu_omp_lock_t *lock)
  2189. {
  2190. _starpu_omp_lock_set(&lock->internal);
  2191. }
  2192. void starpu_omp_unset_lock (starpu_omp_lock_t *lock)
  2193. {
  2194. _starpu_omp_lock_unset(&lock->internal);
  2195. }
  2196. int starpu_omp_test_lock (starpu_omp_lock_t *lock)
  2197. {
  2198. return _starpu_omp_lock_test(&lock->internal);
  2199. }
  2200. void starpu_omp_init_nest_lock (starpu_omp_nest_lock_t *nest_lock)
  2201. {
  2202. _starpu_omp_nest_lock_init(&nest_lock->internal);
  2203. }
  2204. void starpu_omp_destroy_nest_lock (starpu_omp_nest_lock_t *nest_lock)
  2205. {
  2206. _starpu_omp_nest_lock_destroy(&nest_lock->internal);
  2207. }
  2208. void starpu_omp_set_nest_lock (starpu_omp_nest_lock_t *nest_lock)
  2209. {
  2210. _starpu_omp_nest_lock_set(&nest_lock->internal);
  2211. }
  2212. void starpu_omp_unset_nest_lock (starpu_omp_nest_lock_t *nest_lock)
  2213. {
  2214. _starpu_omp_nest_lock_unset(&nest_lock->internal);
  2215. }
  2216. int starpu_omp_test_nest_lock (starpu_omp_nest_lock_t *nest_lock)
  2217. {
  2218. return _starpu_omp_nest_lock_test(&nest_lock->internal);
  2219. }
  2220. void starpu_omp_atomic_fallback_inline_begin(void)
  2221. {
  2222. struct starpu_omp_device *device = get_caller_device();
  2223. _starpu_spin_lock(&device->atomic_lock);
  2224. }
  2225. void starpu_omp_atomic_fallback_inline_end(void)
  2226. {
  2227. struct starpu_omp_device *device = get_caller_device();
  2228. _starpu_spin_unlock(&device->atomic_lock);
  2229. }
  2230. void starpu_omp_vector_annotate(starpu_data_handle_t handle, uint32_t slice_base)
  2231. {
  2232. /* FIXME Oli: rather iterate over all nodes? */
  2233. int node = starpu_data_get_home_node(handle);
  2234. if (node < 0 || (starpu_node_get_kind(node) != STARPU_CPU_RAM))
  2235. node = STARPU_MAIN_RAM;
  2236. struct starpu_vector_interface *vector_interface = (struct starpu_vector_interface *)
  2237. starpu_data_get_interface_on_node(handle, node);
  2238. assert(vector_interface->id == STARPU_VECTOR_INTERFACE_ID);
  2239. vector_interface->slice_base = slice_base;
  2240. }
  2241. struct starpu_arbiter *starpu_omp_get_default_arbiter(void)
  2242. {
  2243. return _global_state.default_arbiter;
  2244. }
  2245. /*
  2246. * restore deprecated diagnostics (-Wdeprecated-declarations)
  2247. */
  2248. #pragma GCC diagnostic pop
  2249. #endif /* STARPU_OPENMP */