workers.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. /*
  2. * StarPU
  3. * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <common/config.h>
  19. #include <common/utils.h>
  20. #include <core/workers.h>
  21. #include <core/debug.h>
  22. #include <core/task.h>
  23. #ifdef __MINGW32__
  24. #include <windows.h>
  25. #endif
  26. /* acquire/release semantic for concurrent initialization/de-initialization */
  27. static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
  28. static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER;
  29. static int init_count;
  30. static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
  31. static pthread_key_t worker_key;
  32. static struct starpu_machine_config_s config;
  33. struct starpu_machine_config_s *_starpu_get_machine_config(void)
  34. {
  35. return &config;
  36. }
  37. /* in case a task is submitted, we may check whether there exists a worker
  38. that may execute the task or not */
  39. inline uint32_t _starpu_worker_exists(uint32_t task_mask)
  40. {
  41. return (task_mask & config.worker_mask);
  42. }
  43. inline uint32_t _starpu_may_submit_cuda_task(void)
  44. {
  45. return (STARPU_CUDA & config.worker_mask);
  46. }
  47. inline uint32_t _starpu_may_submit_cpu_task(void)
  48. {
  49. return (STARPU_CPU & config.worker_mask);
  50. }
  51. inline uint32_t _starpu_may_submit_opencl_task(void)
  52. {
  53. return (STARPU_OPENCL & config.worker_mask);
  54. }
  55. inline uint32_t _starpu_worker_may_execute_task(unsigned workerid, uint32_t where)
  56. {
  57. return (where & config.workers[workerid].worker_mask);
  58. }
  59. /*
  60. * Runtime initialization methods
  61. */
  62. #ifdef STARPU_USE_GORDON
  63. static unsigned gordon_inited = 0;
  64. static struct starpu_worker_set_s gordon_worker_set;
  65. #endif
  66. static void _starpu_init_worker_queue(struct starpu_worker_s *workerarg)
  67. {
  68. struct starpu_jobq_s *jobq = workerarg->jobq;
  69. PTHREAD_MUTEX_LOCK(&jobq->activity_mutex);
  70. /* warning : in case there are multiple workers on the same
  71. queue, we overwrite this value so that it is meaningless */
  72. jobq->arch = workerarg->perf_arch;
  73. jobq->who |= workerarg->worker_mask;
  74. switch (workerarg->arch) {
  75. case STARPU_CPU_WORKER:
  76. jobq->alpha = STARPU_CPU_ALPHA;
  77. break;
  78. case STARPU_CUDA_WORKER:
  79. jobq->alpha = STARPU_CUDA_ALPHA;
  80. break;
  81. case STARPU_OPENCL_WORKER:
  82. jobq->alpha = STARPU_OPENCL_ALPHA;
  83. break;
  84. case STARPU_GORDON_WORKER:
  85. jobq->alpha = STARPU_GORDON_ALPHA;
  86. break;
  87. default:
  88. STARPU_ABORT();
  89. }
  90. PTHREAD_MUTEX_UNLOCK(&jobq->activity_mutex);
  91. _starpu_memory_node_attach_queue(jobq, workerarg->memory_node);
  92. }
  93. static void _starpu_init_workers(struct starpu_machine_config_s *config)
  94. {
  95. config->running = 1;
  96. pthread_key_create(&worker_key, NULL);
  97. /* Launch workers asynchronously (except for SPUs) */
  98. unsigned worker;
  99. for (worker = 0; worker < config->nworkers; worker++)
  100. {
  101. struct starpu_worker_s *workerarg = &config->workers[worker];
  102. workerarg->config = config;
  103. PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  104. PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  105. workerarg->workerid = (int)worker;
  106. /* if some codelet's termination cannot be handled directly :
  107. * for instance in the Gordon driver, Gordon tasks' callbacks
  108. * may be executed by another thread than that of the Gordon
  109. * driver so that we cannot call the push_codelet_output method
  110. * directly */
  111. workerarg->terminated_jobs = starpu_job_list_new();
  112. workerarg->local_jobs = starpu_job_list_new();
  113. PTHREAD_MUTEX_INIT(&workerarg->local_jobs_mutex, NULL);
  114. workerarg->status = STATUS_INITIALIZING;
  115. _starpu_init_worker_queue(workerarg);
  116. switch (workerarg->arch) {
  117. #ifdef STARPU_USE_CPU
  118. case STARPU_CPU_WORKER:
  119. workerarg->set = NULL;
  120. workerarg->worker_is_initialized = 0;
  121. pthread_create(&workerarg->worker_thread,
  122. NULL, _starpu_cpu_worker, workerarg);
  123. break;
  124. #endif
  125. #ifdef STARPU_USE_CUDA
  126. case STARPU_CUDA_WORKER:
  127. workerarg->set = NULL;
  128. workerarg->worker_is_initialized = 0;
  129. pthread_create(&workerarg->worker_thread,
  130. NULL, _starpu_cuda_worker, workerarg);
  131. break;
  132. #endif
  133. #ifdef STARPU_USE_OPENCL
  134. case STARPU_OPENCL_WORKER:
  135. workerarg->set = NULL;
  136. workerarg->worker_is_initialized = 0;
  137. pthread_create(&workerarg->worker_thread,
  138. NULL, _starpu_opencl_worker, workerarg);
  139. break;
  140. #endif
  141. #ifdef STARPU_USE_GORDON
  142. case STARPU_GORDON_WORKER:
  143. /* we will only launch gordon once, but it will handle
  144. * the different SPU workers */
  145. if (!gordon_inited)
  146. {
  147. gordon_worker_set.nworkers = config->ngordon_spus;
  148. gordon_worker_set.workers = &config->workers[worker];
  149. gordon_worker_set.set_is_initialized = 0;
  150. pthread_create(&gordon_worker_set.worker_thread, NULL,
  151. _starpu_gordon_worker, &gordon_worker_set);
  152. PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
  153. while (!gordon_worker_set.set_is_initialized)
  154. PTHREAD_COND_WAIT(&gordon_worker_set.ready_cond,
  155. &gordon_worker_set.mutex);
  156. PTHREAD_MUTEX_UNLOCK(&gordon_worker_set.mutex);
  157. gordon_inited = 1;
  158. }
  159. workerarg->set = &gordon_worker_set;
  160. gordon_worker_set.joined = 0;
  161. workerarg->worker_is_running = 1;
  162. break;
  163. #endif
  164. default:
  165. STARPU_ABORT();
  166. }
  167. }
  168. for (worker = 0; worker < config->nworkers; worker++)
  169. {
  170. struct starpu_worker_s *workerarg = &config->workers[worker];
  171. switch (workerarg->arch) {
  172. case STARPU_CPU_WORKER:
  173. case STARPU_CUDA_WORKER:
  174. case STARPU_OPENCL_WORKER:
  175. PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  176. while (!workerarg->worker_is_initialized)
  177. PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  178. PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  179. break;
  180. #ifdef STARPU_USE_GORDON
  181. case STARPU_GORDON_WORKER:
  182. /* the initialization of Gordon worker is
  183. * synchronous for now */
  184. break;
  185. #endif
  186. default:
  187. STARPU_ABORT();
  188. }
  189. }
  190. }
  191. void _starpu_set_local_worker_key(struct starpu_worker_s *worker)
  192. {
  193. pthread_setspecific(worker_key, worker);
  194. }
  195. struct starpu_worker_s *_starpu_get_local_worker_key(void)
  196. {
  197. return pthread_getspecific(worker_key);
  198. }
  199. int starpu_init(struct starpu_conf *user_conf)
  200. {
  201. int ret;
  202. PTHREAD_MUTEX_LOCK(&init_mutex);
  203. while (initialized == CHANGING)
  204. /* Wait for the other one changing it */
  205. PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  206. init_count++;
  207. if (initialized == INITIALIZED)
  208. /* He initialized it, don't do it again */
  209. return 0;
  210. /* initialized == UNINITIALIZED */
  211. initialized = CHANGING;
  212. PTHREAD_MUTEX_UNLOCK(&init_mutex);
  213. #ifdef __MINGW32__
  214. WSADATA wsadata;
  215. WSAStartup(MAKEWORD(1,0), &wsadata);
  216. #endif
  217. srand(2008);
  218. #ifdef STARPU_USE_FXT
  219. _starpu_start_fxt_profiling();
  220. #endif
  221. _starpu_open_debug_logfile();
  222. _starpu_timing_init();
  223. _starpu_load_bus_performance_files();
  224. /* store the pointer to the user explicit configuration during the
  225. * initialization */
  226. config.user_conf = user_conf;
  227. ret = _starpu_build_topology(&config);
  228. if (ret) {
  229. PTHREAD_MUTEX_LOCK(&init_mutex);
  230. init_count--;
  231. initialized = UNINITIALIZED;
  232. /* Let somebody else try to do it */
  233. PTHREAD_COND_SIGNAL(&init_cond);
  234. PTHREAD_MUTEX_UNLOCK(&init_mutex);
  235. return ret;
  236. }
  237. /* We need to store the current task handled by the different
  238. * threads */
  239. _starpu_initialize_current_task_key();
  240. /* initialize the scheduler */
  241. /* initialize the queue containing the jobs */
  242. _starpu_init_sched_policy(&config);
  243. _starpu_init_workers(&config);
  244. PTHREAD_MUTEX_LOCK(&init_mutex);
  245. initialized = INITIALIZED;
  246. /* Tell everybody that we initialized */
  247. PTHREAD_COND_BROADCAST(&init_cond);
  248. PTHREAD_MUTEX_UNLOCK(&init_mutex);
  249. return 0;
  250. }
  251. /*
  252. * Handle runtime termination
  253. */
  254. static void _starpu_terminate_workers(struct starpu_machine_config_s *config)
  255. {
  256. int status;
  257. unsigned workerid;
  258. for (workerid = 0; workerid < config->nworkers; workerid++)
  259. {
  260. starpu_wake_all_blocked_workers();
  261. #ifdef STARPU_VERBOSE
  262. fprintf(stderr, "wait for worker %d\n", workerid);
  263. #endif
  264. struct starpu_worker_set_s *set = config->workers[workerid].set;
  265. struct starpu_worker_s *worker = &config->workers[workerid];
  266. /* in case StarPU termination code is called from a callback,
  267. * we have to check if pthread_self() is the worker itself */
  268. if (set){
  269. if (!set->joined) {
  270. if (!pthread_equal(pthread_self(), set->worker_thread))
  271. {
  272. status = pthread_join(set->worker_thread, NULL);
  273. #ifdef STARPU_VERBOSE
  274. if (status)
  275. fprintf(stderr, "pthread_join -> %d\n", status);
  276. #endif
  277. }
  278. set->joined = 1;
  279. }
  280. }
  281. else {
  282. if (!pthread_equal(pthread_self(), worker->worker_thread))
  283. {
  284. status = pthread_join(worker->worker_thread, NULL);
  285. #ifdef STARPU_VERBOSE
  286. if (status)
  287. fprintf(stderr, "pthread_join -> %d\n", status);
  288. #endif
  289. }
  290. }
  291. starpu_job_list_delete(worker->local_jobs);
  292. starpu_job_list_delete(worker->terminated_jobs);
  293. }
  294. }
  295. unsigned _starpu_machine_is_running(void)
  296. {
  297. return config.running;
  298. }
  299. unsigned _starpu_worker_can_block(unsigned memnode)
  300. {
  301. unsigned can_block = 1;
  302. if (!_starpu_check_that_no_data_request_exists(memnode))
  303. can_block = 0;
  304. if (!_starpu_machine_is_running())
  305. can_block = 0;
  306. if (!_starpu_execute_registered_progression_hooks())
  307. can_block = 0;
  308. return can_block;
  309. }
  310. typedef enum {
  311. BROADCAST,
  312. LOCK,
  313. UNLOCK
  314. } queue_op;
  315. static void _starpu_operate_on_all_queues_attached_to_node(unsigned nodeid, queue_op op)
  316. {
  317. unsigned q_id;
  318. struct starpu_jobq_s *q;
  319. starpu_mem_node_descr * const descr = _starpu_get_memory_node_description();
  320. pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
  321. unsigned nqueues = descr->queues_count[nodeid];
  322. for (q_id = 0; q_id < nqueues; q_id++)
  323. {
  324. q = descr->attached_queues_per_node[nodeid][q_id];
  325. switch (op) {
  326. case BROADCAST:
  327. PTHREAD_COND_BROADCAST(&q->activity_cond);
  328. break;
  329. case LOCK:
  330. PTHREAD_MUTEX_LOCK(&q->activity_mutex);
  331. break;
  332. case UNLOCK:
  333. PTHREAD_MUTEX_UNLOCK(&q->activity_mutex);
  334. break;
  335. }
  336. }
  337. pthread_rwlock_unlock(&descr->attached_queues_rwlock);
  338. }
  339. inline void _starpu_lock_all_queues_attached_to_node(unsigned node)
  340. {
  341. _starpu_operate_on_all_queues_attached_to_node(node, LOCK);
  342. }
  343. inline void _starpu_unlock_all_queues_attached_to_node(unsigned node)
  344. {
  345. _starpu_operate_on_all_queues_attached_to_node(node, UNLOCK);
  346. }
  347. inline void _starpu_broadcast_all_queues_attached_to_node(unsigned node)
  348. {
  349. _starpu_operate_on_all_queues_attached_to_node(node, BROADCAST);
  350. }
  351. static void _starpu_operate_on_all_queues(queue_op op)
  352. {
  353. unsigned q_id;
  354. struct starpu_jobq_s *q;
  355. starpu_mem_node_descr * const descr = _starpu_get_memory_node_description();
  356. pthread_rwlock_rdlock(&descr->attached_queues_rwlock);
  357. unsigned nqueues = descr->total_queues_count;
  358. for (q_id = 0; q_id < nqueues; q_id++)
  359. {
  360. q = descr->attached_queues_all[q_id];
  361. switch (op) {
  362. case BROADCAST:
  363. PTHREAD_COND_BROADCAST(&q->activity_cond);
  364. break;
  365. case LOCK:
  366. PTHREAD_MUTEX_LOCK(&q->activity_mutex);
  367. break;
  368. case UNLOCK:
  369. PTHREAD_MUTEX_UNLOCK(&q->activity_mutex);
  370. break;
  371. }
  372. }
  373. pthread_rwlock_unlock(&descr->attached_queues_rwlock);
  374. }
  375. static void _starpu_kill_all_workers(struct starpu_machine_config_s *config)
  376. {
  377. /* lock all workers and the scheduler (in the proper order) to make
  378. sure everyone will notice the termination */
  379. /* WARNING: here we make the asumption that a queue is not attached to
  380. * different memory nodes ! */
  381. struct starpu_sched_policy_s *sched = _starpu_get_sched_policy();
  382. _starpu_operate_on_all_queues(LOCK);
  383. PTHREAD_MUTEX_LOCK(&sched->sched_activity_mutex);
  384. /* set the flag which will tell workers to stop */
  385. config->running = 0;
  386. _starpu_operate_on_all_queues(BROADCAST);
  387. PTHREAD_COND_BROADCAST(&sched->sched_activity_cond);
  388. PTHREAD_MUTEX_UNLOCK(&sched->sched_activity_mutex);
  389. _starpu_operate_on_all_queues(UNLOCK);
  390. }
  391. void starpu_shutdown(void)
  392. {
  393. PTHREAD_MUTEX_LOCK(&init_mutex);
  394. init_count--;
  395. if (init_count)
  396. /* Still somebody needing StarPU, don't deinitialize */
  397. return;
  398. /* We're last */
  399. initialized = CHANGING;
  400. PTHREAD_MUTEX_UNLOCK(&init_mutex);
  401. _starpu_display_msi_stats();
  402. _starpu_display_alloc_cache_stats();
  403. /* tell all workers to shutdown */
  404. _starpu_kill_all_workers(&config);
  405. #ifdef STARPU_DATA_STATS
  406. _starpu_display_comm_amounts();
  407. #endif
  408. if (_starpu_get_calibrate_flag())
  409. _starpu_dump_registered_models();
  410. /* wait for their termination */
  411. _starpu_terminate_workers(&config);
  412. _starpu_deinit_sched_policy(&config);
  413. _starpu_destroy_topology(&config);
  414. #ifdef STARPU_USE_FXT
  415. _starpu_stop_fxt_profiling();
  416. #endif
  417. _starpu_close_debug_logfile();
  418. PTHREAD_MUTEX_LOCK(&init_mutex);
  419. initialized = UNINITIALIZED;
  420. /* Let someone else that wants to initialize it again do it */
  421. pthread_cond_signal(&init_cond);
  422. PTHREAD_MUTEX_UNLOCK(&init_mutex);
  423. }
  424. unsigned starpu_worker_get_count(void)
  425. {
  426. return config.nworkers;
  427. }
  428. unsigned starpu_cpu_worker_get_count(void)
  429. {
  430. return config.ncpus;
  431. }
  432. unsigned starpu_cuda_worker_get_count(void)
  433. {
  434. return config.ncudagpus;
  435. }
  436. unsigned starpu_opencl_worker_get_count(void)
  437. {
  438. return config.nopenclgpus;
  439. }
  440. unsigned starpu_spu_worker_get_count(void)
  441. {
  442. return config.ngordon_spus;
  443. }
  444. /* When analyzing performance, it is useful to see what is the processing unit
  445. * that actually performed the task. This function returns the id of the
  446. * processing unit actually executing it, therefore it makes no sense to use it
  447. * within the callbacks of SPU functions for instance. If called by some thread
  448. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  449. int starpu_worker_get_id(void)
  450. {
  451. struct starpu_worker_s * worker;
  452. worker = _starpu_get_local_worker_key();
  453. if (worker)
  454. {
  455. return worker->workerid;
  456. }
  457. else {
  458. /* there is no worker associated to that thread, perhaps it is
  459. * a thread from the application or this is some SPU worker */
  460. return -1;
  461. }
  462. }
  463. int starpu_worker_get_devid(int id)
  464. {
  465. return config.workers[id].devid;
  466. }
  467. struct starpu_worker_s *_starpu_get_worker_struct(unsigned id)
  468. {
  469. return &config.workers[id];
  470. }
  471. enum starpu_archtype starpu_worker_get_type(int id)
  472. {
  473. return config.workers[id].arch;
  474. }
  475. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  476. {
  477. char *name = config.workers[id].name;
  478. snprintf(dst, maxlen, "%s", name);
  479. }