workers.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2012 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2010, 2011 Institut National de Recherche en Informatique et Automatique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. * Copyright (C) 2011 INRIA
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/workers.h>
  25. #include <core/debug.h>
  26. #include <core/task.h>
  27. #include <profiling/profiling.h>
  28. #include <starpu_task_list.h>
  29. #ifdef __MINGW32__
  30. #include <windows.h>
  31. #endif
  32. /* acquire/release semantic for concurrent initialization/de-initialization */
  33. static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
  34. static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER;
  35. static int init_count = 0;
  36. static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
  37. static pthread_key_t worker_key;
  38. static struct _starpu_machine_config config;
  39. int _starpu_is_initialized(void)
  40. {
  41. return initialized == INITIALIZED;
  42. }
  43. struct _starpu_machine_config *_starpu_get_machine_config(void)
  44. {
  45. return &config;
  46. }
  47. /* Makes sure that at least one of the workers of type <arch> can execute
  48. * <task>*/
  49. static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
  50. enum starpu_archtype arch)
  51. {
  52. int i;
  53. int nworkers = starpu_worker_get_count_by_type(arch);
  54. int workers[nworkers];
  55. STARPU_ASSERT(nworkers != -EINVAL);
  56. (void) starpu_worker_get_ids_by_type(arch, workers, nworkers);
  57. for (i = 0; i < nworkers; i++)
  58. if (task->cl->can_execute(workers[i], task, 0))
  59. return 1;
  60. return 0;
  61. }
  62. /* in case a task is submitted, we may check whether there exists a worker
  63. that may execute the task or not */
  64. uint32_t _starpu_worker_exists(struct starpu_task *task)
  65. {
  66. if (!(task->cl->where & config.worker_mask))
  67. return 0;
  68. if (!task->cl->can_execute)
  69. return 1;
  70. #ifdef STARPU_USE_CPU
  71. if ((task->cl->where & STARPU_CPU) &&
  72. _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
  73. return 1;
  74. #endif
  75. #ifdef STARPU_USE_CUDA
  76. if ((task->cl->where & STARPU_CUDA) &&
  77. _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
  78. return 1;
  79. #endif
  80. #ifdef STARPU_USE_OPENCL
  81. if ((task->cl->where & STARPU_OPENCL) &&
  82. _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
  83. return 1;
  84. #endif
  85. return 0;
  86. }
  87. uint32_t _starpu_can_submit_cuda_task(void)
  88. {
  89. return (STARPU_CUDA & config.worker_mask);
  90. }
  91. uint32_t _starpu_can_submit_cpu_task(void)
  92. {
  93. return (STARPU_CPU & config.worker_mask);
  94. }
  95. uint32_t _starpu_can_submit_opencl_task(void)
  96. {
  97. return (STARPU_OPENCL & config.worker_mask);
  98. }
  99. static int _starpu_can_use_nth_implementation(enum starpu_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
  100. {
  101. switch(arch)
  102. {
  103. case STARPU_CPU_WORKER:
  104. {
  105. starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  106. return func != NULL;
  107. }
  108. case STARPU_CUDA_WORKER:
  109. {
  110. starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  111. return func != NULL;
  112. }
  113. case STARPU_OPENCL_WORKER:
  114. {
  115. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  116. return func != NULL;
  117. }
  118. case STARPU_GORDON_WORKER:
  119. {
  120. starpu_gordon_func_t func = _starpu_task_get_gordon_nth_implementation(cl, nimpl);
  121. return func != 0;
  122. }
  123. default:
  124. STARPU_ASSERT_MSG(0, "Unknown arch type");
  125. }
  126. return 0;
  127. }
  128. int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  129. {
  130. /* TODO: check that the task operand sizes will fit on that device */
  131. return (task->cl->where & config.workers[workerid].worker_mask) &&
  132. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
  133. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
  134. }
  135. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  136. {
  137. /* TODO: check that the task operand sizes will fit on that device */
  138. /* TODO: call application-provided function for various cases like
  139. * double support, shared memory size limit, etc. */
  140. struct starpu_codelet *cl = task->cl;
  141. unsigned nworkers = config.topology.nworkers;
  142. /* Is this a parallel worker ? */
  143. if (workerid < nworkers)
  144. {
  145. return !!((task->cl->where & config.workers[workerid].worker_mask) &&
  146. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
  147. }
  148. else
  149. {
  150. if ((cl->type == STARPU_SPMD)
  151. #ifdef STARPU_HAVE_HWLOC
  152. || (cl->type == STARPU_FORKJOIN)
  153. #endif
  154. )
  155. {
  156. /* TODO we should add other types of constraints */
  157. /* Is the worker larger than requested ? */
  158. int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
  159. return !!((worker_size <= task->cl->max_parallelism) &&
  160. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
  161. }
  162. else
  163. {
  164. /* We have a sequential task but a parallel worker */
  165. return 0;
  166. }
  167. }
  168. }
  169. /*
  170. * Runtime initialization methods
  171. */
  172. #ifdef STARPU_USE_GORDON
  173. static unsigned gordon_inited = 0;
  174. static struct _starpu_worker_set gordon_worker_set;
  175. #endif
  176. static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
  177. {
  178. pthread_cond_t *cond = &workerarg->sched_cond;
  179. pthread_mutex_t *mutex = &workerarg->sched_mutex;
  180. unsigned memory_node = workerarg->memory_node;
  181. _starpu_memory_node_register_condition(cond, mutex, memory_node);
  182. }
  183. /*
  184. * Returns 0 if the given driver is one of the drivers that must be launched by
  185. * the application itself, and not by StarPU, 1 otherwise.
  186. */
  187. static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
  188. struct starpu_driver *d)
  189. {
  190. if (conf->n_not_launched_drivers == 0 ||
  191. conf->not_launched_drivers == NULL)
  192. return 1;
  193. /* Is <d> in conf->not_launched_drivers ? */
  194. unsigned i;
  195. for (i = 0; i < conf->n_not_launched_drivers; i++)
  196. {
  197. if (d->type != conf->not_launched_drivers[i].type)
  198. continue;
  199. switch (d->type)
  200. {
  201. case STARPU_CUDA_WORKER:
  202. if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
  203. return 0;
  204. break;
  205. #ifdef STARPU_USE_OPENCL
  206. case STARPU_OPENCL_WORKER:
  207. if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
  208. return 0;
  209. break;
  210. #endif
  211. default:
  212. STARPU_ABORT();
  213. }
  214. }
  215. return 1;
  216. }
  217. static void _starpu_launch_drivers(struct _starpu_machine_config *config)
  218. {
  219. config->running = 1;
  220. config->submitting = 1;
  221. pthread_key_create(&worker_key, NULL);
  222. unsigned nworkers = config->topology.nworkers;
  223. /* Launch workers asynchronously (except for SPUs) */
  224. unsigned cuda = 0;
  225. unsigned worker;
  226. for (worker = 0; worker < nworkers; worker++)
  227. {
  228. struct _starpu_worker *workerarg = &config->workers[worker];
  229. workerarg->config = config;
  230. _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
  231. _STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  232. _STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  233. workerarg->worker_size = 1;
  234. workerarg->combined_workerid = workerarg->workerid;
  235. workerarg->current_rank = 0;
  236. workerarg->has_prev_init = 0;
  237. /* mutex + cond only for the local list */
  238. /* we have a single local list */
  239. /* afterwards there would be a mutex + cond for the list of each strategy */
  240. _STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
  241. _STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
  242. /* if some codelet's termination cannot be handled directly :
  243. * for instance in the Gordon driver, Gordon tasks' callbacks
  244. * may be executed by another thread than that of the Gordon
  245. * driver so that we cannot call the push_codelet_output method
  246. * directly */
  247. workerarg->terminated_jobs = _starpu_job_list_new();
  248. starpu_task_list_init(&workerarg->local_tasks);
  249. workerarg->status = STATUS_INITIALIZING;
  250. _STARPU_DEBUG("initialising worker %u\n", worker);
  251. _starpu_init_worker_queue(workerarg);
  252. struct starpu_driver driver;
  253. driver.type = workerarg->arch;
  254. switch (workerarg->arch)
  255. {
  256. #ifdef STARPU_USE_CPU
  257. case STARPU_CPU_WORKER:
  258. workerarg->set = NULL;
  259. workerarg->worker_is_initialized = 0;
  260. pthread_create(&workerarg->worker_thread,
  261. NULL, _starpu_cpu_worker, workerarg);
  262. break;
  263. #endif
  264. #ifdef STARPU_USE_CUDA
  265. case STARPU_CUDA_WORKER:
  266. workerarg->set = NULL;
  267. workerarg->worker_is_initialized = 0;
  268. driver.id.cuda_id = cuda;
  269. if (_starpu_may_launch_driver(config->conf, &driver))
  270. {
  271. pthread_create(&workerarg->worker_thread,
  272. NULL, _starpu_cuda_worker, workerarg);
  273. }
  274. cuda++;
  275. break;
  276. #endif
  277. #ifdef STARPU_USE_OPENCL
  278. case STARPU_OPENCL_WORKER:
  279. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  280. if (!_starpu_may_launch_driver(config->conf, &driver))
  281. break;
  282. workerarg->set = NULL;
  283. workerarg->worker_is_initialized = 0;
  284. pthread_create(&workerarg->worker_thread,
  285. NULL, _starpu_opencl_worker, workerarg);
  286. break;
  287. #endif
  288. #ifdef STARPU_USE_GORDON
  289. case STARPU_GORDON_WORKER:
  290. /* we will only launch gordon once, but it will handle
  291. * the different SPU workers */
  292. if (!gordon_inited)
  293. {
  294. gordon_worker_set.nworkers = config->ngordon_spus;
  295. gordon_worker_set.workers = &config->workers[worker];
  296. gordon_worker_set.set_is_initialized = 0;
  297. pthread_create(&gordon_worker_set.worker_thread, NULL,
  298. _starpu_gordon_worker, &gordon_worker_set);
  299. _STARPU_PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
  300. while (!gordon_worker_set.set_is_initialized)
  301. _STARPU_PTHREAD_COND_WAIT(&gordon_worker_set.ready_cond,
  302. &gordon_worker_set.mutex);
  303. _STARPU_PTHREAD_MUTEX_UNLOCK(&gordon_worker_set.mutex);
  304. gordon_inited = 1;
  305. }
  306. workerarg->set = &gordon_worker_set;
  307. gordon_worker_set.joined = 0;
  308. workerarg->worker_is_running = 1;
  309. break;
  310. #endif
  311. default:
  312. STARPU_ABORT();
  313. }
  314. }
  315. cuda = 0;
  316. for (worker = 0; worker < nworkers; worker++)
  317. {
  318. struct _starpu_worker *workerarg = &config->workers[worker];
  319. struct starpu_driver driver;
  320. driver.type = workerarg->arch;
  321. switch (workerarg->arch)
  322. {
  323. case STARPU_CPU_WORKER:
  324. _STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  325. while (!workerarg->worker_is_initialized)
  326. _STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  327. _STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  328. break;
  329. case STARPU_CUDA_WORKER:
  330. driver.id.cuda_id = cuda;
  331. if (!_starpu_may_launch_driver(config->conf, &driver))
  332. {
  333. cuda++;
  334. break;
  335. }
  336. _STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  337. while (!workerarg->worker_is_initialized)
  338. _STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  339. _STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  340. cuda++;
  341. break;
  342. #ifdef STARPU_USE_OPENCL
  343. case STARPU_OPENCL_WORKER:
  344. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  345. if (!_starpu_may_launch_driver(config->conf, &driver))
  346. break;
  347. _STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  348. while (!workerarg->worker_is_initialized)
  349. _STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  350. _STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  351. break;
  352. #endif
  353. #ifdef STARPU_USE_GORDON
  354. case STARPU_GORDON_WORKER:
  355. /* the initialization of Gordon worker is
  356. * synchronous for now */
  357. break;
  358. #endif
  359. default:
  360. STARPU_ABORT();
  361. }
  362. }
  363. }
  364. void _starpu_set_local_worker_key(struct _starpu_worker *worker)
  365. {
  366. pthread_setspecific(worker_key, worker);
  367. }
  368. struct _starpu_worker *_starpu_get_local_worker_key(void)
  369. {
  370. return (struct _starpu_worker *) pthread_getspecific(worker_key);
  371. }
  372. /* Initialize the starpu_conf with default values */
  373. int starpu_conf_init(struct starpu_conf *conf)
  374. {
  375. if (!conf)
  376. return -EINVAL;
  377. memset(conf, 0, sizeof(*conf));
  378. conf->magic = 42;
  379. conf->sched_policy_name = getenv("STARPU_SCHED");
  380. conf->sched_policy = NULL;
  381. /* Note that starpu_get_env_number returns -1 in case the variable is
  382. * not defined */
  383. conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
  384. conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
  385. conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
  386. conf->nspus = starpu_get_env_number("STARPU_NGORDON");
  387. conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
  388. if (conf->calibrate == -1)
  389. conf->calibrate = 0;
  390. conf->use_explicit_workers_bindid = 0; /* TODO */
  391. conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
  392. conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
  393. conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
  394. if (conf->single_combined_worker == -1)
  395. conf->single_combined_worker = 0;
  396. conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
  397. if (conf->disable_asynchronous_copy == -1)
  398. conf->disable_asynchronous_copy = 0;
  399. return 0;
  400. }
  401. static void _starpu_conf_set_value_against_environment(char *name, int *value)
  402. {
  403. int number;
  404. number = starpu_get_env_number(name);
  405. if (number != -1)
  406. {
  407. *value = number;
  408. }
  409. }
  410. static void _starpu_conf_check_environment(struct starpu_conf *conf)
  411. {
  412. char *sched = getenv("STARPU_SCHED");
  413. if (sched)
  414. {
  415. conf->sched_policy_name = sched;
  416. }
  417. _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
  418. _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
  419. _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
  420. _starpu_conf_set_value_against_environment("STARPU_NGORDON", &conf->nspus);
  421. _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
  422. _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
  423. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
  424. }
  425. int starpu_init(struct starpu_conf *user_conf)
  426. {
  427. int ret;
  428. #ifdef __GNUC__
  429. #ifndef __OPTIMIZE__
  430. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
  431. #endif
  432. #endif
  433. #if 0
  434. #ifndef STARPU_NO_ASSERT
  435. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured without --enable-fast\n");
  436. #endif
  437. #endif
  438. #ifdef STARPU_MEMORY_STATUS
  439. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-memory-status, which slows down a bit\n");
  440. #endif
  441. #ifdef STARPU_VERBOSE
  442. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
  443. #endif
  444. #ifdef STARPU_USE_FXT
  445. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --with-fxt, which slows down a bit\n");
  446. #endif
  447. #ifdef STARPU_PERF_DEBUG
  448. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
  449. #endif
  450. #ifdef STARPU_MODEL_DEBUG
  451. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
  452. #endif
  453. #ifdef STARPU_DATA_STATS
  454. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-stats, which slows down a bit\n");
  455. #endif
  456. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  457. while (initialized == CHANGING)
  458. /* Wait for the other one changing it */
  459. _STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  460. init_count++;
  461. if (initialized == INITIALIZED)
  462. {
  463. /* He initialized it, don't do it again, and let the others get the mutex */
  464. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  465. return 0;
  466. }
  467. /* initialized == UNINITIALIZED */
  468. initialized = CHANGING;
  469. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  470. #ifdef __MINGW32__
  471. WSADATA wsadata;
  472. WSAStartup(MAKEWORD(1,0), &wsadata);
  473. #endif
  474. srand(2008);
  475. #ifdef STARPU_USE_FXT
  476. _starpu_start_fxt_profiling();
  477. #endif
  478. _starpu_open_debug_logfile();
  479. _starpu_data_interface_init();
  480. _starpu_timing_init();
  481. // _starpu_profiling_init();
  482. _starpu_load_bus_performance_files();
  483. /* store the pointer to the user explicit configuration during the
  484. * initialization */
  485. if (user_conf == NULL)
  486. {
  487. struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
  488. starpu_conf_init(conf);
  489. config.conf = conf;
  490. config.default_conf = 1;
  491. }
  492. else
  493. {
  494. if (user_conf->magic != 42) {
  495. fprintf(stderr, "starpu_conf structure needs to be initialized with starpu_conf_init\n");
  496. return -EINVAL;
  497. }
  498. config.conf = user_conf;
  499. config.default_conf = 0;
  500. }
  501. _starpu_conf_check_environment(config.conf);
  502. _starpu_init_all_sched_ctxs(&config);
  503. ret = _starpu_build_topology(&config);
  504. if (ret)
  505. {
  506. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  507. init_count--;
  508. initialized = UNINITIALIZED;
  509. /* Let somebody else try to do it */
  510. _STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  511. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  512. return ret;
  513. }
  514. /* We need to store the current task handled by the different
  515. * threads */
  516. _starpu_initialize_current_task_key();
  517. struct _starpu_sched_ctx *sched_ctx;
  518. if(user_conf == NULL)
  519. sched_ctx = _starpu_create_sched_ctx(NULL, NULL, -1, 1, "init");
  520. else
  521. sched_ctx = _starpu_create_sched_ctx(user_conf->sched_policy_name, NULL, -1, 1, "init");
  522. starpu_set_sched_ctx(&sched_ctx->id);
  523. _starpu_initialize_registered_performance_models();
  524. /* Launch "basic" workers (ie. non-combined workers) */
  525. _starpu_launch_drivers(&config);
  526. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  527. initialized = INITIALIZED;
  528. /* Tell everybody that we initialized */
  529. _STARPU_PTHREAD_COND_BROADCAST(&init_cond);
  530. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  531. _STARPU_DEBUG("Initialisation finished\n");
  532. return 0;
  533. }
  534. void starpu_profiling_init()
  535. {
  536. _starpu_profiling_init();
  537. }
  538. /*
  539. * Handle runtime termination
  540. */
  541. static void _starpu_terminate_workers(struct _starpu_machine_config *config)
  542. {
  543. int status STARPU_ATTRIBUTE_UNUSED;
  544. unsigned workerid;
  545. for (workerid = 0; workerid < config->topology.nworkers; workerid++)
  546. {
  547. starpu_wake_all_blocked_workers();
  548. _STARPU_DEBUG("wait for worker %u\n", workerid);
  549. struct _starpu_worker_set *set = config->workers[workerid].set;
  550. struct _starpu_worker *worker = &config->workers[workerid];
  551. /* in case StarPU termination code is called from a callback,
  552. * we have to check if pthread_self() is the worker itself */
  553. if (set)
  554. {
  555. if (!set->joined)
  556. {
  557. if (!pthread_equal(pthread_self(), set->worker_thread))
  558. {
  559. status = pthread_join(set->worker_thread, NULL);
  560. #ifdef STARPU_VERBOSE
  561. if (status)
  562. {
  563. _STARPU_DEBUG("pthread_join -> %d\n", status);
  564. }
  565. #endif
  566. }
  567. set->joined = 1;
  568. }
  569. }
  570. else
  571. {
  572. if (!pthread_equal(pthread_self(), worker->worker_thread))
  573. {
  574. status = pthread_join(worker->worker_thread, NULL);
  575. #ifdef STARPU_VERBOSE
  576. if (status)
  577. {
  578. _STARPU_DEBUG("pthread_join -> %d\n", status);
  579. }
  580. #endif
  581. }
  582. }
  583. STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
  584. _starpu_job_list_delete(worker->terminated_jobs);
  585. }
  586. }
  587. unsigned _starpu_machine_is_running(void)
  588. {
  589. /* running is just protected by a memory barrier */
  590. STARPU_SYNCHRONIZE();
  591. return config.running;
  592. }
  593. unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED)
  594. {
  595. #ifdef STARPU_NON_BLOCKING_DRIVERS
  596. return 0;
  597. #else
  598. unsigned can_block = 1;
  599. if (!_starpu_check_that_no_data_request_exists(memnode))
  600. can_block = 0;
  601. if (!_starpu_machine_is_running())
  602. can_block = 0;
  603. if (!_starpu_execute_registered_progression_hooks())
  604. can_block = 0;
  605. return can_block;
  606. #endif
  607. }
  608. static void _starpu_kill_all_workers(struct _starpu_machine_config *config)
  609. {
  610. /* set the flag which will tell workers to stop */
  611. config->running = 0;
  612. /* running is just protected by a memory barrier */
  613. STARPU_SYNCHRONIZE();
  614. starpu_wake_all_blocked_workers();
  615. }
  616. void starpu_display_stats()
  617. {
  618. const char *stats;
  619. if ((stats = getenv("STARPU_BUS_STATS")) && atoi(stats))
  620. starpu_bus_profiling_helper_display_summary();
  621. if ((stats = getenv("STARPU_WORKER_STATS")) && atoi(stats))
  622. starpu_worker_profiling_helper_display_summary();
  623. }
  624. void starpu_shutdown(void)
  625. {
  626. const char *stats;
  627. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  628. init_count--;
  629. if (init_count)
  630. {
  631. _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
  632. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  633. return;
  634. }
  635. /* We're last */
  636. initialized = CHANGING;
  637. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  638. starpu_task_wait_for_no_ready();
  639. _starpu_display_msi_stats();
  640. _starpu_display_alloc_cache_stats();
  641. /* tell all workers to shutdown */
  642. _starpu_kill_all_workers(&config);
  643. #ifdef STARPU_MEMORY_STATUS
  644. if ((stats = getenv("STARPU_MEMORY_STATS")) && atoi(stats))
  645. _starpu_display_data_stats();
  646. #endif
  647. #ifdef STARPU_DATA_STATS
  648. _starpu_display_comm_amounts();
  649. #endif
  650. if ((stats = getenv("STARPU_BUS_STATS")) && atoi(stats))
  651. starpu_bus_profiling_helper_display_summary();
  652. if ((stats = getenv("STARPU_WORKER_STATS")) && atoi(stats))
  653. starpu_worker_profiling_helper_display_summary();
  654. _starpu_deinitialize_registered_performance_models();
  655. /* wait for their termination */
  656. _starpu_terminate_workers(&config);
  657. _starpu_delete_all_sched_ctxs();
  658. _starpu_destroy_topology(&config);
  659. #ifdef STARPU_USE_FXT
  660. _starpu_stop_fxt_profiling();
  661. #endif
  662. _starpu_data_interface_shutdown();
  663. /* Drop all remaining tags */
  664. _starpu_tag_clear();
  665. _starpu_close_debug_logfile();
  666. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  667. initialized = UNINITIALIZED;
  668. /* Let someone else that wants to initialize it again do it */
  669. _STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  670. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  671. /* Clear memory if it was allocated by StarPU */
  672. if (config.default_conf)
  673. free(config.conf);
  674. _STARPU_DEBUG("Shutdown finished\n");
  675. }
  676. unsigned starpu_worker_get_count(void)
  677. {
  678. return config.topology.nworkers;
  679. }
  680. int starpu_worker_get_count_by_type(enum starpu_archtype type)
  681. {
  682. switch (type)
  683. {
  684. case STARPU_CPU_WORKER:
  685. return config.topology.ncpus;
  686. case STARPU_CUDA_WORKER:
  687. return config.topology.ncudagpus;
  688. case STARPU_OPENCL_WORKER:
  689. return config.topology.nopenclgpus;
  690. case STARPU_GORDON_WORKER:
  691. return config.topology.ngordon_spus;
  692. default:
  693. return -EINVAL;
  694. }
  695. }
  696. unsigned starpu_combined_worker_get_count(void)
  697. {
  698. return config.topology.ncombinedworkers;
  699. }
  700. unsigned starpu_cpu_worker_get_count(void)
  701. {
  702. return config.topology.ncpus;
  703. }
  704. unsigned starpu_cuda_worker_get_count(void)
  705. {
  706. return config.topology.ncudagpus;
  707. }
  708. unsigned starpu_opencl_worker_get_count(void)
  709. {
  710. return config.topology.nopenclgpus;
  711. }
  712. unsigned starpu_spu_worker_get_count(void)
  713. {
  714. return config.topology.ngordon_spus;
  715. }
  716. int starpu_asynchronous_copy_disabled()
  717. {
  718. return config.conf->disable_asynchronous_copy;
  719. }
  720. /* When analyzing performance, it is useful to see what is the processing unit
  721. * that actually performed the task. This function returns the id of the
  722. * processing unit actually executing it, therefore it makes no sense to use it
  723. * within the callbacks of SPU functions for instance. If called by some thread
  724. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  725. int starpu_worker_get_id(void)
  726. {
  727. struct _starpu_worker * worker;
  728. worker = _starpu_get_local_worker_key();
  729. if (worker)
  730. {
  731. return worker->workerid;
  732. }
  733. else
  734. {
  735. /* there is no worker associated to that thread, perhaps it is
  736. * a thread from the application or this is some SPU worker */
  737. return -1;
  738. }
  739. }
  740. int starpu_combined_worker_get_id(void)
  741. {
  742. struct _starpu_worker *worker;
  743. worker = _starpu_get_local_worker_key();
  744. if (worker)
  745. {
  746. return worker->combined_workerid;
  747. }
  748. else
  749. {
  750. /* there is no worker associated to that thread, perhaps it is
  751. * a thread from the application or this is some SPU worker */
  752. return -1;
  753. }
  754. }
  755. int starpu_combined_worker_get_size(void)
  756. {
  757. struct _starpu_worker *worker;
  758. worker = _starpu_get_local_worker_key();
  759. if (worker)
  760. {
  761. return worker->worker_size;
  762. }
  763. else
  764. {
  765. /* there is no worker associated to that thread, perhaps it is
  766. * a thread from the application or this is some SPU worker */
  767. return -1;
  768. }
  769. }
  770. int starpu_combined_worker_get_rank(void)
  771. {
  772. struct _starpu_worker *worker;
  773. worker = _starpu_get_local_worker_key();
  774. if (worker)
  775. {
  776. return worker->current_rank;
  777. }
  778. else
  779. {
  780. /* there is no worker associated to that thread, perhaps it is
  781. * a thread from the application or this is some SPU worker */
  782. return -1;
  783. }
  784. }
  785. int starpu_worker_get_devid(int id)
  786. {
  787. return config.workers[id].devid;
  788. }
  789. struct _starpu_worker *_starpu_get_worker_struct(unsigned id)
  790. {
  791. return &config.workers[id];
  792. }
  793. unsigned starpu_worker_is_combined_worker(int id)
  794. {
  795. return id >= (int)config.topology.nworkers;
  796. }
  797. struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id)
  798. {
  799. STARPU_ASSERT(id <= STARPU_NMAX_SCHED_CTXS);
  800. return &config.sched_ctxs[id];
  801. }
  802. struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
  803. {
  804. unsigned basic_worker_count = starpu_worker_get_count();
  805. STARPU_ASSERT(id >= basic_worker_count);
  806. return &config.combined_workers[id - basic_worker_count];
  807. }
  808. enum starpu_archtype starpu_worker_get_type(int id)
  809. {
  810. return config.workers[id].arch;
  811. }
  812. int starpu_worker_get_ids_by_type(enum starpu_archtype type, int *workerids, int maxsize)
  813. {
  814. unsigned nworkers = starpu_worker_get_count();
  815. int cnt = 0;
  816. unsigned id;
  817. for (id = 0; id < nworkers; id++)
  818. {
  819. if (starpu_worker_get_type(id) == type)
  820. {
  821. /* Perhaps the array is too small ? */
  822. if (cnt >= maxsize)
  823. return -ERANGE;
  824. workerids[cnt++] = id;
  825. }
  826. }
  827. return cnt;
  828. }
  829. int starpu_worker_get_nids_by_type(enum starpu_archtype type, int *workerids, int maxsize)
  830. {
  831. unsigned nworkers = starpu_worker_get_count();
  832. int cnt = 0;
  833. unsigned id;
  834. for (id = 0; id < nworkers; id++)
  835. {
  836. if (starpu_worker_get_type(id) == type)
  837. {
  838. /* Perhaps the array is too small ? */
  839. if (cnt >= maxsize)
  840. return cnt;
  841. workerids[cnt++] = id;
  842. }
  843. }
  844. return cnt;
  845. }
  846. int starpu_worker_get_available_ids_by_type(enum starpu_archtype type, int *workerids, int maxsize)
  847. {
  848. unsigned nworkers = starpu_worker_get_count();
  849. int cnt = 0;
  850. unsigned id, worker;
  851. unsigned found = 0;
  852. for (id = 0; id < nworkers; id++)
  853. {
  854. found = 0;
  855. if (starpu_worker_get_type(id) == type)
  856. {
  857. /* Perhaps the array is too small ? */
  858. if (cnt >= maxsize)
  859. return cnt;
  860. int s;
  861. for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++)
  862. {
  863. if(config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  864. {
  865. struct worker_collection *workers = config.sched_ctxs[s].workers;
  866. if(workers->init_cursor)
  867. workers->init_cursor(workers);
  868. while(workers->has_next(workers))
  869. {
  870. worker = workers->get_next(workers);
  871. if(worker == id)
  872. {
  873. found = 1;
  874. break;
  875. }
  876. }
  877. if(workers->init_cursor)
  878. workers->deinit_cursor(workers);
  879. if(found) break;
  880. }
  881. }
  882. if(!found)
  883. workerids[cnt++] = id;
  884. }
  885. }
  886. return cnt;
  887. }
  888. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  889. {
  890. char *name = config.workers[id].name;
  891. snprintf(dst, maxlen, "%s", name);
  892. }
  893. /* Retrieve the status which indicates what the worker is currently doing. */
  894. enum _starpu_worker_status _starpu_worker_get_status(int workerid)
  895. {
  896. return config.workers[workerid].status;
  897. }
  898. /* Change the status of the worker which indicates what the worker is currently
  899. * doing (eg. executing a callback). */
  900. void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
  901. {
  902. config.workers[workerid].status = status;
  903. }
  904. struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
  905. {
  906. return &config.sched_ctxs[0];
  907. }
  908. #ifdef STARPU_USE_CUDA
  909. extern int _starpu_run_cuda(struct starpu_driver *);
  910. #endif
  911. #ifdef STARPU_USE_OPENCL
  912. extern int _starpu_run_opencl(struct starpu_driver *);
  913. #endif
  914. int
  915. starpu_run_driver(struct starpu_driver *d)
  916. {
  917. if (!d)
  918. return -EINVAL;
  919. switch (d->type)
  920. {
  921. #ifdef STARPU_USE_CUDA
  922. case STARPU_CUDA_WORKER:
  923. return _starpu_run_cuda(d);
  924. #endif
  925. #ifdef STARPU_USE_OPENCL
  926. case STARPU_OPENCL_WORKER:
  927. return _starpu_run_opencl(d);
  928. #endif
  929. case STARPU_CPU_WORKER: /* Not supported yet */
  930. case STARPU_GORDON_WORKER: /* Not supported yet */
  931. default:
  932. return -EINVAL;
  933. }
  934. }