workers.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <common/config.h>
  19. #include <core/workers.h>
  20. #include <core/debug.h>
  21. #ifdef __MINGW32__
  22. #include <windows.h>
  23. #endif
  24. static pthread_key_t worker_key;
  25. static struct machine_config_s config;
  26. struct machine_config_s *_starpu_get_machine_config(void)
  27. {
  28. return &config;
  29. }
  30. /* in case a task is submitted, we may check whether there exists a worker
  31. that may execute the task or not */
  32. inline uint32_t _starpu_worker_exists(uint32_t task_mask)
  33. {
  34. return (task_mask & config.worker_mask);
  35. }
  36. inline uint32_t may_submit_cuda_task(void)
  37. {
  38. return (STARPU_CUDA & config.worker_mask);
  39. }
  40. inline uint32_t may_submit_cpu_task(void)
  41. {
  42. return (STARPU_CPU & config.worker_mask);
  43. }
  44. inline uint32_t _starpu_worker_may_execute_task(unsigned workerid, uint32_t where)
  45. {
  46. return (where & config.workers[workerid].worker_mask);
  47. }
  48. /*
  49. * Runtime initialization methods
  50. */
  51. #ifdef USE_GORDON
  52. static unsigned gordon_inited = 0;
  53. static struct worker_set_s gordon_worker_set;
  54. #endif
  55. static void _starpu_init_worker_queue(struct worker_s *workerarg)
  56. {
  57. struct jobq_s *jobq = workerarg->jobq;
  58. /* warning : in case there are multiple workers on the same
  59. queue, we overwrite this value so that it is meaningless */
  60. jobq->arch = workerarg->perf_arch;
  61. jobq->who |= workerarg->worker_mask;
  62. switch (workerarg->arch) {
  63. case STARPU_CPU_WORKER:
  64. jobq->alpha = STARPU_CPU_ALPHA;
  65. break;
  66. case STARPU_CUDA_WORKER:
  67. jobq->alpha = STARPU_CUDA_ALPHA;
  68. break;
  69. case STARPU_GORDON_WORKER:
  70. jobq->alpha = STARPU_GORDON_ALPHA;
  71. break;
  72. default:
  73. STARPU_ABORT();
  74. }
  75. memory_node_attach_queue(jobq, workerarg->memory_node);
  76. }
  77. static void _starpu_init_workers(struct machine_config_s *config)
  78. {
  79. config->running = 1;
  80. pthread_key_create(&worker_key, NULL);
  81. /* Launch workers asynchronously (except for SPUs) */
  82. unsigned worker;
  83. for (worker = 0; worker < config->nworkers; worker++)
  84. {
  85. struct worker_s *workerarg = &config->workers[worker];
  86. workerarg->config = config;
  87. pthread_mutex_init(&workerarg->mutex, NULL);
  88. pthread_cond_init(&workerarg->ready_cond, NULL);
  89. workerarg->workerid = (int)worker;
  90. /* if some codelet's termination cannot be handled directly :
  91. * for instance in the Gordon driver, Gordon tasks' callbacks
  92. * may be executed by another thread than that of the Gordon
  93. * driver so that we cannot call the push_codelet_output method
  94. * directly */
  95. workerarg->terminated_jobs = job_list_new();
  96. workerarg->local_jobs = job_list_new();
  97. pthread_mutex_init(&workerarg->local_jobs_mutex, NULL);
  98. workerarg->status = STATUS_INITIALIZING;
  99. _starpu_init_worker_queue(workerarg);
  100. switch (workerarg->arch) {
  101. #ifdef USE_CPUS
  102. case STARPU_CPU_WORKER:
  103. workerarg->set = NULL;
  104. workerarg->worker_is_initialized = 0;
  105. pthread_create(&workerarg->worker_thread,
  106. NULL, _starpu_cpu_worker, workerarg);
  107. break;
  108. #endif
  109. #ifdef USE_CUDA
  110. case STARPU_CUDA_WORKER:
  111. workerarg->set = NULL;
  112. workerarg->worker_is_initialized = 0;
  113. pthread_create(&workerarg->worker_thread,
  114. NULL, _starpu_cuda_worker, workerarg);
  115. break;
  116. #endif
  117. #ifdef USE_GORDON
  118. case STARPU_GORDON_WORKER:
  119. /* we will only launch gordon once, but it will handle
  120. * the different SPU workers */
  121. if (!gordon_inited)
  122. {
  123. gordon_worker_set.nworkers = config->ngordon_spus;
  124. gordon_worker_set.workers = &config->workers[worker];
  125. gordon_worker_set.set_is_initialized = 0;
  126. pthread_create(&gordon_worker_set.worker_thread, NULL,
  127. gordon_worker, &gordon_worker_set);
  128. pthread_mutex_lock(&gordon_worker_set.mutex);
  129. if (!gordon_worker_set.set_is_initialized)
  130. pthread_cond_wait(&gordon_worker_set.ready_cond,
  131. &gordon_worker_set.mutex);
  132. pthread_mutex_unlock(&gordon_worker_set.mutex);
  133. gordon_inited = 1;
  134. }
  135. workerarg->set = &gordon_worker_set;
  136. gordon_worker_set.joined = 0;
  137. workerarg->worker_is_running = 1;
  138. break;
  139. #endif
  140. default:
  141. STARPU_ABORT();
  142. }
  143. }
  144. for (worker = 0; worker < config->nworkers; worker++)
  145. {
  146. struct worker_s *workerarg = &config->workers[worker];
  147. switch (workerarg->arch) {
  148. case STARPU_CPU_WORKER:
  149. case STARPU_CUDA_WORKER:
  150. pthread_mutex_lock(&workerarg->mutex);
  151. if (!workerarg->worker_is_initialized)
  152. pthread_cond_wait(&workerarg->ready_cond, &workerarg->mutex);
  153. pthread_mutex_unlock(&workerarg->mutex);
  154. break;
  155. #ifdef USE_GORDON
  156. case STARPU_GORDON_WORKER:
  157. /* the initialization of Gordon worker is
  158. * synchronous for now */
  159. break;
  160. #endif
  161. default:
  162. STARPU_ABORT();
  163. }
  164. }
  165. }
  166. void _starpu_set_local_worker_key(struct worker_s *worker)
  167. {
  168. pthread_setspecific(worker_key, worker);
  169. }
  170. struct worker_s *_starpu_get_local_worker_key(void)
  171. {
  172. return pthread_getspecific(worker_key);
  173. }
  174. int starpu_init(struct starpu_conf *user_conf)
  175. {
  176. int ret;
  177. #ifdef __MINGW32__
  178. WSADATA wsadata;
  179. WSAStartup(MAKEWORD(1,0), &wsadata);
  180. #endif
  181. srand(2008);
  182. #ifdef USE_FXT
  183. start_fxt_profiling();
  184. #endif
  185. _starpu_open_debug_logfile();
  186. timing_init();
  187. load_bus_performance_files();
  188. /* store the pointer to the user explicit configuration during the
  189. * initialization */
  190. config.user_conf = user_conf;
  191. ret = starpu_build_topology(&config);
  192. if (ret)
  193. return ret;
  194. /* initialize the scheduler */
  195. /* initialize the queue containing the jobs */
  196. init_sched_policy(&config);
  197. _starpu_init_workers(&config);
  198. return 0;
  199. }
  200. /*
  201. * Handle runtime termination
  202. */
  203. static void _starpu_terminate_workers(struct machine_config_s *config)
  204. {
  205. int status;
  206. unsigned workerid;
  207. for (workerid = 0; workerid < config->nworkers; workerid++)
  208. {
  209. starpu_wake_all_blocked_workers();
  210. #ifdef VERBOSE
  211. fprintf(stderr, "wait for worker %d\n", workerid);
  212. #endif
  213. struct worker_set_s *set = config->workers[workerid].set;
  214. struct worker_s *worker = &config->workers[workerid];
  215. /* in case StarPU termination code is called from a callback,
  216. * we have to check if pthread_self() is the worker itself */
  217. if (set){
  218. if (!set->joined) {
  219. if (!pthread_equal(pthread_self(), set->worker_thread))
  220. {
  221. status = pthread_join(set->worker_thread, NULL);
  222. #ifdef VERBOSE
  223. if (status)
  224. fprintf(stderr, "pthread_join -> %d\n", status);
  225. #endif
  226. }
  227. set->joined = 1;
  228. }
  229. }
  230. else {
  231. if (!pthread_equal(pthread_self(), worker->worker_thread))
  232. {
  233. status = pthread_join(worker->worker_thread, NULL);
  234. #ifdef VERBOSE
  235. if (status)
  236. fprintf(stderr, "pthread_join -> %d\n", status);
  237. #endif
  238. }
  239. }
  240. job_list_delete(worker->local_jobs);
  241. job_list_delete(worker->terminated_jobs);
  242. }
  243. }
  244. unsigned _starpu_machine_is_running(void)
  245. {
  246. return config.running;
  247. }
  248. unsigned _starpu_worker_can_block(unsigned memnode)
  249. {
  250. unsigned can_block = 1;
  251. if (!check_that_no_data_request_exists(memnode))
  252. can_block = 0;
  253. if (!_starpu_machine_is_running())
  254. can_block = 0;
  255. if (!_starpu_execute_registered_progression_hooks())
  256. can_block = 0;
  257. return can_block;
  258. }
  259. typedef enum {
  260. BROADCAST,
  261. LOCK,
  262. UNLOCK
  263. } queue_op;
  264. static void _starpu_operate_on_all_queues_attached_to_node(unsigned nodeid, queue_op op)
  265. {
  266. unsigned q_id;
  267. struct jobq_s *q;
  268. mem_node_descr * const descr = get_memory_node_description();
  269. pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
  270. unsigned nqueues = descr->queues_count[nodeid];
  271. for (q_id = 0; q_id < nqueues; q_id++)
  272. {
  273. q = descr->attached_queues_per_node[nodeid][q_id];
  274. switch (op) {
  275. case BROADCAST:
  276. pthread_cond_broadcast(&q->activity_cond);
  277. break;
  278. case LOCK:
  279. pthread_mutex_lock(&q->activity_mutex);
  280. break;
  281. case UNLOCK:
  282. pthread_mutex_unlock(&q->activity_mutex);
  283. break;
  284. }
  285. }
  286. pthread_rwlock_unlock(&descr->attached_queues_rwlock);
  287. }
  288. inline void _starpu_lock_all_queues_attached_to_node(unsigned node)
  289. {
  290. _starpu_operate_on_all_queues_attached_to_node(node, LOCK);
  291. }
  292. inline void _starpu_unlock_all_queues_attached_to_node(unsigned node)
  293. {
  294. _starpu_operate_on_all_queues_attached_to_node(node, UNLOCK);
  295. }
  296. inline void _starpu_broadcast_all_queues_attached_to_node(unsigned node)
  297. {
  298. _starpu_operate_on_all_queues_attached_to_node(node, BROADCAST);
  299. }
  300. static void _starpu_operate_on_all_queues(queue_op op)
  301. {
  302. unsigned q_id;
  303. struct jobq_s *q;
  304. mem_node_descr * const descr = get_memory_node_description();
  305. pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
  306. unsigned nqueues = descr->total_queues_count;
  307. for (q_id = 0; q_id < nqueues; q_id++)
  308. {
  309. q = descr->attached_queues_all[q_id];
  310. switch (op) {
  311. case BROADCAST:
  312. pthread_cond_broadcast(&q->activity_cond);
  313. break;
  314. case LOCK:
  315. pthread_mutex_lock(&q->activity_mutex);
  316. break;
  317. case UNLOCK:
  318. pthread_mutex_unlock(&q->activity_mutex);
  319. break;
  320. }
  321. }
  322. pthread_rwlock_unlock(&descr->attached_queues_rwlock);
  323. }
  324. static void _starpu_kill_all_workers(struct machine_config_s *config)
  325. {
  326. /* lock all workers and the scheduler (in the proper order) to make
  327. sure everyone will notice the termination */
  328. /* WARNING: here we make the asumption that a queue is not attached to
  329. * different memory nodes ! */
  330. struct sched_policy_s *sched = get_sched_policy();
  331. _starpu_operate_on_all_queues(LOCK);
  332. pthread_mutex_lock(&sched->sched_activity_mutex);
  333. /* set the flag which will tell workers to stop */
  334. config->running = 0;
  335. _starpu_operate_on_all_queues(BROADCAST);
  336. pthread_cond_broadcast(&sched->sched_activity_cond);
  337. pthread_mutex_unlock(&sched->sched_activity_mutex);
  338. _starpu_operate_on_all_queues(UNLOCK);
  339. }
  340. void starpu_shutdown(void)
  341. {
  342. display_msi_stats();
  343. display_alloc_cache_stats();
  344. /* tell all workers to shutdown */
  345. _starpu_kill_all_workers(&config);
  346. #ifdef DATA_STATS
  347. display_comm_ammounts();
  348. #endif
  349. if (starpu_get_env_number("CALIBRATE") != -1)
  350. dump_registered_models();
  351. /* wait for their termination */
  352. _starpu_terminate_workers(&config);
  353. deinit_sched_policy(&config);
  354. starpu_destroy_topology(&config);
  355. #ifdef USE_FXT
  356. stop_fxt_profiling();
  357. #endif
  358. _starpu_close_debug_logfile();
  359. }
  360. unsigned starpu_get_worker_count(void)
  361. {
  362. return config.nworkers;
  363. }
  364. unsigned starpu_get_cpu_worker_count(void)
  365. {
  366. return config.ncpus;
  367. }
  368. unsigned starpu_get_cuda_worker_count(void)
  369. {
  370. return config.ncudagpus;
  371. }
  372. unsigned starpu_get_spu_worker_count(void)
  373. {
  374. return config.ngordon_spus;
  375. }
  376. /* When analyzing performance, it is useful to see what is the processing unit
  377. * that actually performed the task. This function returns the id of the
  378. * processing unit actually executing it, therefore it makes no sense to use it
  379. * within the callbacks of SPU functions for instance. If called by some thread
  380. * that is not controlled by StarPU, starpu_get_worker_id returns -1. */
  381. int starpu_get_worker_id(void)
  382. {
  383. struct worker_s * worker;
  384. worker = _starpu_get_local_worker_key();
  385. if (worker)
  386. {
  387. return worker->workerid;
  388. }
  389. else {
  390. /* there is no worker associated to that thread, perhaps it is
  391. * a thread from the application or this is some SPU worker */
  392. return -1;
  393. }
  394. }
  395. struct worker_s *_starpu_get_worker_struct(unsigned id)
  396. {
  397. return &config.workers[id];
  398. }
  399. enum starpu_archtype starpu_get_worker_type(int id)
  400. {
  401. return config.workers[id].arch;
  402. }
  403. void starpu_get_worker_name(int id, char *dst, size_t maxlen)
  404. {
  405. char *name = config.workers[id].name;
  406. snprintf(dst, maxlen, "%s", name);
  407. }