workers.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2012 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012 Centre National de la Recherche Scientifique
  5. * Copyright (C) 2010, 2011 Institut National de Recherche en Informatique et Automatique
  6. * Copyright (C) 2011 Télécom-SudParis
  7. * Copyright (C) 2011 INRIA
  8. *
  9. * StarPU is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation; either version 2.1 of the License, or (at
  12. * your option) any later version.
  13. *
  14. * StarPU is distributed in the hope that it will be useful, but
  15. * WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17. *
  18. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  19. */
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <common/config.h>
  23. #include <common/utils.h>
  24. #include <core/workers.h>
  25. #include <core/debug.h>
  26. #include <core/task.h>
  27. #include <profiling/profiling.h>
  28. #include <starpu_task_list.h>
  29. #include <drivers/cpu/driver_cpu.h>
  30. #include <drivers/cuda/driver_cuda.h>
  31. #include <drivers/opencl/driver_opencl.h>
  32. #ifdef __MINGW32__
  33. #include <windows.h>
  34. #endif
  35. /* acquire/release semantic for concurrent initialization/de-initialization */
  36. static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
  37. static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER;
  38. static int init_count = 0;
  39. static enum { UNINITIALIZED, CHANGING, INITIALIZED } initialized = UNINITIALIZED;
  40. static pthread_key_t worker_key;
  41. static struct _starpu_machine_config config;
  42. int _starpu_is_initialized(void)
  43. {
  44. return initialized == INITIALIZED;
  45. }
  46. struct _starpu_machine_config *_starpu_get_machine_config(void)
  47. {
  48. return &config;
  49. }
  50. /* Makes sure that at least one of the workers of type <arch> can execute
  51. * <task>*/
  52. static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
  53. enum starpu_archtype arch)
  54. {
  55. int i;
  56. int nworkers = starpu_worker_get_count_by_type(arch);
  57. int workers[nworkers];
  58. STARPU_ASSERT(nworkers != -EINVAL);
  59. (void) starpu_worker_get_ids_by_type(arch, workers, nworkers);
  60. for (i = 0; i < nworkers; i++)
  61. if (task->cl->can_execute(workers[i], task, 0))
  62. return 1;
  63. return 0;
  64. }
  65. /* in case a task is submitted, we may check whether there exists a worker
  66. that may execute the task or not */
  67. uint32_t _starpu_worker_exists(struct starpu_task *task)
  68. {
  69. if (!(task->cl->where & config.worker_mask))
  70. return 0;
  71. if (!task->cl->can_execute)
  72. return 1;
  73. #ifdef STARPU_USE_CPU
  74. if ((task->cl->where & STARPU_CPU) &&
  75. _starpu_worker_exists_and_can_execute(task, STARPU_CPU_WORKER))
  76. return 1;
  77. #endif
  78. #ifdef STARPU_USE_CUDA
  79. if ((task->cl->where & STARPU_CUDA) &&
  80. _starpu_worker_exists_and_can_execute(task, STARPU_CUDA_WORKER))
  81. return 1;
  82. #endif
  83. #ifdef STARPU_USE_OPENCL
  84. if ((task->cl->where & STARPU_OPENCL) &&
  85. _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
  86. return 1;
  87. #endif
  88. return 0;
  89. }
  90. uint32_t _starpu_can_submit_cuda_task(void)
  91. {
  92. return (STARPU_CUDA & config.worker_mask);
  93. }
  94. uint32_t _starpu_can_submit_cpu_task(void)
  95. {
  96. return (STARPU_CPU & config.worker_mask);
  97. }
  98. uint32_t _starpu_can_submit_opencl_task(void)
  99. {
  100. return (STARPU_OPENCL & config.worker_mask);
  101. }
  102. static int _starpu_can_use_nth_implementation(enum starpu_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
  103. {
  104. switch(arch)
  105. {
  106. case STARPU_CPU_WORKER:
  107. {
  108. starpu_cpu_func_t func = _starpu_task_get_cpu_nth_implementation(cl, nimpl);
  109. return func != NULL;
  110. }
  111. case STARPU_CUDA_WORKER:
  112. {
  113. starpu_cuda_func_t func = _starpu_task_get_cuda_nth_implementation(cl, nimpl);
  114. return func != NULL;
  115. }
  116. case STARPU_OPENCL_WORKER:
  117. {
  118. starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
  119. return func != NULL;
  120. }
  121. case STARPU_GORDON_WORKER:
  122. {
  123. starpu_gordon_func_t func = _starpu_task_get_gordon_nth_implementation(cl, nimpl);
  124. return func != 0;
  125. }
  126. default:
  127. STARPU_ASSERT_MSG(0, "Unknown arch type");
  128. }
  129. return 0;
  130. }
  131. int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  132. {
  133. /* TODO: check that the task operand sizes will fit on that device */
  134. return (task->cl->where & config.workers[workerid].worker_mask) &&
  135. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl) &&
  136. (!task->cl->can_execute || task->cl->can_execute(workerid, task, nimpl));
  137. }
  138. int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
  139. {
  140. /* TODO: check that the task operand sizes will fit on that device */
  141. /* TODO: call application-provided function for various cases like
  142. * double support, shared memory size limit, etc. */
  143. struct starpu_codelet *cl = task->cl;
  144. unsigned nworkers = config.topology.nworkers;
  145. /* Is this a parallel worker ? */
  146. if (workerid < nworkers)
  147. {
  148. return !!((task->cl->where & config.workers[workerid].worker_mask) &&
  149. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
  150. }
  151. else
  152. {
  153. if ((cl->type == STARPU_SPMD)
  154. #ifdef STARPU_HAVE_HWLOC
  155. || (cl->type == STARPU_FORKJOIN)
  156. #endif
  157. )
  158. {
  159. /* TODO we should add other types of constraints */
  160. /* Is the worker larger than requested ? */
  161. int worker_size = (int)config.combined_workers[workerid - nworkers].worker_size;
  162. return !!((worker_size <= task->cl->max_parallelism) &&
  163. _starpu_can_use_nth_implementation(config.workers[workerid].arch, task->cl, nimpl));
  164. }
  165. else
  166. {
  167. /* We have a sequential task but a parallel worker */
  168. return 0;
  169. }
  170. }
  171. }
  172. /*
  173. * Runtime initialization methods
  174. */
  175. #ifdef STARPU_USE_GORDON
  176. static unsigned gordon_inited = 0;
  177. static struct _starpu_worker_set gordon_worker_set;
  178. #endif
  179. static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
  180. {
  181. pthread_cond_t *cond = &workerarg->sched_cond;
  182. pthread_mutex_t *mutex = &workerarg->sched_mutex;
  183. unsigned memory_node = workerarg->memory_node;
  184. _starpu_memory_node_register_condition(cond, mutex, memory_node);
  185. }
  186. /*
  187. * Returns 0 if the given driver is one of the drivers that must be launched by
  188. * the application itself, and not by StarPU, 1 otherwise.
  189. */
  190. static unsigned _starpu_may_launch_driver(struct starpu_conf *conf,
  191. struct starpu_driver *d)
  192. {
  193. if (conf->n_not_launched_drivers == 0 ||
  194. conf->not_launched_drivers == NULL)
  195. return 1;
  196. /* Is <d> in conf->not_launched_drivers ? */
  197. unsigned i;
  198. for (i = 0; i < conf->n_not_launched_drivers; i++)
  199. {
  200. if (d->type != conf->not_launched_drivers[i].type)
  201. continue;
  202. switch (d->type)
  203. {
  204. case STARPU_CPU_WORKER:
  205. if (d->id.cpu_id == conf->not_launched_drivers[i].id.cpu_id)
  206. return 0;
  207. case STARPU_CUDA_WORKER:
  208. if (d->id.cuda_id == conf->not_launched_drivers[i].id.cuda_id)
  209. return 0;
  210. break;
  211. #ifdef STARPU_USE_OPENCL
  212. case STARPU_OPENCL_WORKER:
  213. if (d->id.opencl_id == conf->not_launched_drivers[i].id.opencl_id)
  214. return 0;
  215. break;
  216. #endif
  217. default:
  218. STARPU_ABORT();
  219. }
  220. }
  221. return 1;
  222. }
  223. static void _starpu_launch_drivers(struct _starpu_machine_config *config)
  224. {
  225. config->running = 1;
  226. config->submitting = 1;
  227. pthread_key_create(&worker_key, NULL);
  228. unsigned nworkers = config->topology.nworkers;
  229. /* Launch workers asynchronously (except for SPUs) */
  230. unsigned cpu = 0, cuda = 0;
  231. unsigned worker;
  232. for (worker = 0; worker < nworkers; worker++)
  233. {
  234. struct _starpu_worker *workerarg = &config->workers[worker];
  235. workerarg->config = config;
  236. _starpu_barrier_counter_init(&workerarg->tasks_barrier, 0);
  237. _STARPU_PTHREAD_MUTEX_INIT(&workerarg->mutex, NULL);
  238. _STARPU_PTHREAD_COND_INIT(&workerarg->ready_cond, NULL);
  239. workerarg->worker_size = 1;
  240. workerarg->combined_workerid = workerarg->workerid;
  241. workerarg->current_rank = 0;
  242. workerarg->run_by_starpu = 1;
  243. workerarg->has_prev_init = 0;
  244. /* mutex + cond only for the local list */
  245. /* we have a single local list */
  246. /* afterwards there would be a mutex + cond for the list of each strategy */
  247. _STARPU_PTHREAD_MUTEX_INIT(&workerarg->sched_mutex, NULL);
  248. _STARPU_PTHREAD_COND_INIT(&workerarg->sched_cond, NULL);
  249. /* if some codelet's termination cannot be handled directly :
  250. * for instance in the Gordon driver, Gordon tasks' callbacks
  251. * may be executed by another thread than that of the Gordon
  252. * driver so that we cannot call the push_codelet_output method
  253. * directly */
  254. workerarg->terminated_jobs = _starpu_job_list_new();
  255. starpu_task_list_init(&workerarg->local_tasks);
  256. workerarg->status = STATUS_INITIALIZING;
  257. _STARPU_DEBUG("initialising worker %u\n", worker);
  258. _starpu_init_worker_queue(workerarg);
  259. struct starpu_driver driver;
  260. driver.type = workerarg->arch;
  261. switch (workerarg->arch)
  262. {
  263. #ifdef STARPU_USE_CPU
  264. case STARPU_CPU_WORKER:
  265. workerarg->set = NULL;
  266. workerarg->worker_is_initialized = 0;
  267. driver.id.cpu_id = cpu;
  268. if (_starpu_may_launch_driver(config->conf, &driver))
  269. {
  270. pthread_create(&workerarg->worker_thread,
  271. NULL, _starpu_cpu_worker, workerarg);
  272. }
  273. else
  274. {
  275. workerarg->run_by_starpu = 0;
  276. }
  277. cpu++;
  278. break;
  279. #endif
  280. #ifdef STARPU_USE_CUDA
  281. case STARPU_CUDA_WORKER:
  282. workerarg->set = NULL;
  283. workerarg->worker_is_initialized = 0;
  284. driver.id.cuda_id = cuda;
  285. if (_starpu_may_launch_driver(config->conf, &driver))
  286. {
  287. pthread_create(&workerarg->worker_thread,
  288. NULL, _starpu_cuda_worker, workerarg);
  289. }
  290. else
  291. {
  292. workerarg->run_by_starpu = 0;
  293. }
  294. cuda++;
  295. break;
  296. #endif
  297. #ifdef STARPU_USE_OPENCL
  298. case STARPU_OPENCL_WORKER:
  299. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  300. if (!_starpu_may_launch_driver(config->conf, &driver))
  301. {
  302. workerarg->run_by_starpu = 0;
  303. break;
  304. }
  305. workerarg->set = NULL;
  306. workerarg->worker_is_initialized = 0;
  307. pthread_create(&workerarg->worker_thread,
  308. NULL, _starpu_opencl_worker, workerarg);
  309. break;
  310. #endif
  311. #ifdef STARPU_USE_GORDON
  312. case STARPU_GORDON_WORKER:
  313. /* we will only launch gordon once, but it will handle
  314. * the different SPU workers */
  315. if (!gordon_inited)
  316. {
  317. gordon_worker_set.nworkers = config->ngordon_spus;
  318. gordon_worker_set.workers = &config->workers[worker];
  319. gordon_worker_set.set_is_initialized = 0;
  320. pthread_create(&gordon_worker_set.worker_thread, NULL,
  321. _starpu_gordon_worker, &gordon_worker_set);
  322. _STARPU_PTHREAD_MUTEX_LOCK(&gordon_worker_set.mutex);
  323. while (!gordon_worker_set.set_is_initialized)
  324. _STARPU_PTHREAD_COND_WAIT(&gordon_worker_set.ready_cond,
  325. &gordon_worker_set.mutex);
  326. _STARPU_PTHREAD_MUTEX_UNLOCK(&gordon_worker_set.mutex);
  327. gordon_inited = 1;
  328. }
  329. workerarg->set = &gordon_worker_set;
  330. gordon_worker_set.joined = 0;
  331. workerarg->worker_is_running = 1;
  332. break;
  333. #endif
  334. default:
  335. STARPU_ABORT();
  336. }
  337. }
  338. cpu = 0;
  339. cuda = 0;
  340. for (worker = 0; worker < nworkers; worker++)
  341. {
  342. struct _starpu_worker *workerarg = &config->workers[worker];
  343. struct starpu_driver driver;
  344. driver.type = workerarg->arch;
  345. switch (workerarg->arch)
  346. {
  347. case STARPU_CPU_WORKER:
  348. driver.id.cpu_id = cpu;
  349. if (!_starpu_may_launch_driver(config->conf, &driver))
  350. {
  351. cpu++;
  352. break;
  353. }
  354. _STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  355. while (!workerarg->worker_is_initialized)
  356. _STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  357. _STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  358. cpu++;
  359. break;
  360. case STARPU_CUDA_WORKER:
  361. driver.id.cuda_id = cuda;
  362. if (!_starpu_may_launch_driver(config->conf, &driver))
  363. {
  364. cuda++;
  365. break;
  366. }
  367. _STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  368. while (!workerarg->worker_is_initialized)
  369. _STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  370. _STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  371. cuda++;
  372. break;
  373. #ifdef STARPU_USE_OPENCL
  374. case STARPU_OPENCL_WORKER:
  375. starpu_opencl_get_device(workerarg->devid, &driver.id.opencl_id);
  376. if (!_starpu_may_launch_driver(config->conf, &driver))
  377. break;
  378. _STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
  379. while (!workerarg->worker_is_initialized)
  380. _STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
  381. _STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
  382. break;
  383. #endif
  384. #ifdef STARPU_USE_GORDON
  385. case STARPU_GORDON_WORKER:
  386. /* the initialization of Gordon worker is
  387. * synchronous for now */
  388. break;
  389. #endif
  390. default:
  391. STARPU_ABORT();
  392. }
  393. }
  394. }
  395. void _starpu_set_local_worker_key(struct _starpu_worker *worker)
  396. {
  397. pthread_setspecific(worker_key, worker);
  398. }
  399. struct _starpu_worker *_starpu_get_local_worker_key(void)
  400. {
  401. return (struct _starpu_worker *) pthread_getspecific(worker_key);
  402. }
  403. /* Initialize the starpu_conf with default values */
  404. int starpu_conf_init(struct starpu_conf *conf)
  405. {
  406. if (!conf)
  407. return -EINVAL;
  408. memset(conf, 0, sizeof(*conf));
  409. conf->magic = 42;
  410. conf->sched_policy_name = getenv("STARPU_SCHED");
  411. conf->sched_policy = NULL;
  412. /* Note that starpu_get_env_number returns -1 in case the variable is
  413. * not defined */
  414. /* Backward compatibility: check the value of STARPU_NCPUS if
  415. * STARPU_NCPU is not set. */
  416. conf->ncpus = starpu_get_env_number("STARPU_NCPU");
  417. if (conf->ncpus == -1)
  418. conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
  419. conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
  420. conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
  421. conf->nspus = starpu_get_env_number("STARPU_NGORDON");
  422. conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
  423. if (conf->calibrate == -1)
  424. conf->calibrate = 0;
  425. conf->use_explicit_workers_bindid = 0; /* TODO */
  426. conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
  427. conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
  428. conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
  429. if (conf->single_combined_worker == -1)
  430. conf->single_combined_worker = 0;
  431. conf->disable_asynchronous_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_COPY");
  432. if (conf->disable_asynchronous_copy == -1)
  433. conf->disable_asynchronous_copy = 0;
  434. return 0;
  435. }
  436. static void _starpu_conf_set_value_against_environment(char *name, int *value)
  437. {
  438. int number;
  439. number = starpu_get_env_number(name);
  440. if (number != -1)
  441. {
  442. *value = number;
  443. }
  444. }
  445. static void _starpu_conf_check_environment(struct starpu_conf *conf)
  446. {
  447. char *sched = getenv("STARPU_SCHED");
  448. if (sched)
  449. {
  450. conf->sched_policy_name = sched;
  451. }
  452. _starpu_conf_set_value_against_environment("STARPU_NCPUS", &conf->ncpus);
  453. _starpu_conf_set_value_against_environment("STARPU_NCPU", &conf->ncpus);
  454. _starpu_conf_set_value_against_environment("STARPU_NCUDA", &conf->ncuda);
  455. _starpu_conf_set_value_against_environment("STARPU_NOPENCL", &conf->nopencl);
  456. _starpu_conf_set_value_against_environment("STARPU_NGORDON", &conf->nspus);
  457. _starpu_conf_set_value_against_environment("STARPU_CALIBRATE", &conf->calibrate);
  458. _starpu_conf_set_value_against_environment("STARPU_SINGLE_COMBINED_WORKER", &conf->single_combined_worker);
  459. _starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
  460. }
  461. int starpu_init(struct starpu_conf *user_conf)
  462. {
  463. int ret;
  464. #ifdef __GNUC__
  465. #ifndef __OPTIMIZE__
  466. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-debug (-O0), and is thus not optimized\n");
  467. #endif
  468. #endif
  469. #if 0
  470. #ifndef STARPU_NO_ASSERT
  471. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured without --enable-fast\n");
  472. #endif
  473. #endif
  474. #ifdef STARPU_MEMORY_STATUS
  475. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-memory-status, which slows down a bit\n");
  476. #endif
  477. #ifdef STARPU_VERBOSE
  478. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-verbose, which slows down a bit\n");
  479. #endif
  480. #ifdef STARPU_USE_FXT
  481. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --with-fxt, which slows down a bit\n");
  482. #endif
  483. #ifdef STARPU_PERF_DEBUG
  484. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-perf-debug, which slows down a bit\n");
  485. #endif
  486. #ifdef STARPU_MODEL_DEBUG
  487. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-model-debug, which slows down a bit\n");
  488. #endif
  489. #ifdef STARPU_DATA_STATS
  490. if (!getenv("STARPU_SILENT")) fprintf(stderr,"Warning: StarPU was configured with --enable-stats, which slows down a bit\n");
  491. #endif
  492. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  493. while (initialized == CHANGING)
  494. /* Wait for the other one changing it */
  495. _STARPU_PTHREAD_COND_WAIT(&init_cond, &init_mutex);
  496. init_count++;
  497. if (initialized == INITIALIZED)
  498. {
  499. /* He initialized it, don't do it again, and let the others get the mutex */
  500. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  501. return 0;
  502. }
  503. /* initialized == UNINITIALIZED */
  504. initialized = CHANGING;
  505. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  506. #ifdef __MINGW32__
  507. WSADATA wsadata;
  508. WSAStartup(MAKEWORD(1,0), &wsadata);
  509. #endif
  510. srand(2008);
  511. /* store the pointer to the user explicit configuration during the
  512. * initialization */
  513. if (user_conf == NULL)
  514. {
  515. struct starpu_conf *conf = malloc(sizeof(struct starpu_conf));
  516. starpu_conf_init(conf);
  517. config.conf = conf;
  518. config.default_conf = 1;
  519. }
  520. else
  521. {
  522. if (user_conf->magic != 42) {
  523. fprintf(stderr, "starpu_conf structure needs to be initialized with starpu_conf_init\n");
  524. return -EINVAL;
  525. }
  526. config.conf = user_conf;
  527. config.default_conf = 0;
  528. }
  529. _starpu_conf_check_environment(config.conf);
  530. _starpu_init_all_sched_ctxs(&config);
  531. #ifdef STARPU_USE_FXT
  532. _starpu_start_fxt_profiling();
  533. #endif
  534. _starpu_open_debug_logfile();
  535. _starpu_data_interface_init();
  536. _starpu_timing_init();
  537. _starpu_profiling_init();
  538. _starpu_load_bus_performance_files();
  539. ret = _starpu_build_topology(&config);
  540. if (ret)
  541. {
  542. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  543. init_count--;
  544. initialized = UNINITIALIZED;
  545. /* Let somebody else try to do it */
  546. _STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  547. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  548. return ret;
  549. }
  550. /* We need to store the current task handled by the different
  551. * threads */
  552. _starpu_initialize_current_task_key();
  553. struct _starpu_sched_ctx *sched_ctx;
  554. if(user_conf == NULL)
  555. sched_ctx = _starpu_create_sched_ctx(NULL, NULL, -1, 1, "init");
  556. else
  557. sched_ctx = _starpu_create_sched_ctx(user_conf->sched_policy_name, NULL, -1, 1, "init");
  558. starpu_set_sched_ctx(&sched_ctx->id);
  559. _starpu_initialize_registered_performance_models();
  560. /* Launch "basic" workers (ie. non-combined workers) */
  561. _starpu_launch_drivers(&config);
  562. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  563. initialized = INITIALIZED;
  564. /* Tell everybody that we initialized */
  565. _STARPU_PTHREAD_COND_BROADCAST(&init_cond);
  566. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  567. _STARPU_DEBUG("Initialisation finished\n");
  568. return 0;
  569. }
  570. void starpu_profiling_init()
  571. {
  572. _starpu_profiling_init();
  573. }
  574. /*
  575. * Handle runtime termination
  576. */
  577. static void _starpu_terminate_workers(struct _starpu_machine_config *config)
  578. {
  579. int status STARPU_ATTRIBUTE_UNUSED;
  580. unsigned workerid;
  581. for (workerid = 0; workerid < config->topology.nworkers; workerid++)
  582. {
  583. starpu_wake_all_blocked_workers();
  584. _STARPU_DEBUG("wait for worker %u\n", workerid);
  585. struct _starpu_worker_set *set = config->workers[workerid].set;
  586. struct _starpu_worker *worker = &config->workers[workerid];
  587. /* in case StarPU termination code is called from a callback,
  588. * we have to check if pthread_self() is the worker itself */
  589. if (set)
  590. {
  591. if (!set->joined)
  592. {
  593. if (!pthread_equal(pthread_self(), set->worker_thread))
  594. {
  595. status = pthread_join(set->worker_thread, NULL);
  596. #ifdef STARPU_VERBOSE
  597. if (status)
  598. {
  599. _STARPU_DEBUG("pthread_join -> %d\n", status);
  600. }
  601. #endif
  602. }
  603. set->joined = 1;
  604. }
  605. }
  606. else
  607. {
  608. if (!worker->run_by_starpu)
  609. goto out;
  610. if (!pthread_equal(pthread_self(), worker->worker_thread))
  611. {
  612. status = pthread_join(worker->worker_thread, NULL);
  613. #ifdef STARPU_VERBOSE
  614. if (status)
  615. {
  616. _STARPU_DEBUG("pthread_join -> %d\n", status);
  617. }
  618. #endif
  619. }
  620. }
  621. out:
  622. STARPU_ASSERT(starpu_task_list_empty(&worker->local_tasks));
  623. _starpu_job_list_delete(worker->terminated_jobs);
  624. }
  625. }
  626. unsigned _starpu_machine_is_running(void)
  627. {
  628. /* running is just protected by a memory barrier */
  629. STARPU_SYNCHRONIZE();
  630. return config.running;
  631. }
  632. unsigned _starpu_worker_can_block(unsigned memnode STARPU_ATTRIBUTE_UNUSED)
  633. {
  634. #ifdef STARPU_NON_BLOCKING_DRIVERS
  635. return 0;
  636. #else
  637. unsigned can_block = 1;
  638. if (!_starpu_check_that_no_data_request_exists(memnode))
  639. can_block = 0;
  640. if (!_starpu_machine_is_running())
  641. can_block = 0;
  642. if (!_starpu_execute_registered_progression_hooks())
  643. can_block = 0;
  644. return can_block;
  645. #endif
  646. }
  647. static void _starpu_kill_all_workers(struct _starpu_machine_config *config)
  648. {
  649. /* set the flag which will tell workers to stop */
  650. config->running = 0;
  651. /* running is just protected by a memory barrier */
  652. STARPU_SYNCHRONIZE();
  653. starpu_wake_all_blocked_workers();
  654. }
  655. void starpu_display_stats()
  656. {
  657. const char *stats;
  658. if ((stats = getenv("STARPU_BUS_STATS")) && atoi(stats))
  659. starpu_bus_profiling_helper_display_summary();
  660. if ((stats = getenv("STARPU_WORKER_STATS")) && atoi(stats))
  661. starpu_worker_profiling_helper_display_summary();
  662. }
  663. void starpu_shutdown(void)
  664. {
  665. const char *stats;
  666. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  667. init_count--;
  668. if (init_count)
  669. {
  670. _STARPU_DEBUG("Still somebody needing StarPU, don't deinitialize\n");
  671. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  672. return;
  673. }
  674. /* We're last */
  675. initialized = CHANGING;
  676. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  677. starpu_task_wait_for_no_ready();
  678. _starpu_display_msi_stats();
  679. _starpu_display_alloc_cache_stats();
  680. /* tell all workers to shutdown */
  681. _starpu_kill_all_workers(&config);
  682. #ifdef STARPU_MEMORY_STATUS
  683. if ((stats = getenv("STARPU_MEMORY_STATS")) && atoi(stats))
  684. _starpu_display_data_stats();
  685. #endif
  686. #ifdef STARPU_DATA_STATS
  687. _starpu_display_comm_amounts();
  688. #endif
  689. if ((stats = getenv("STARPU_BUS_STATS")) && atoi(stats))
  690. starpu_bus_profiling_helper_display_summary();
  691. if ((stats = getenv("STARPU_WORKER_STATS")) && atoi(stats))
  692. starpu_worker_profiling_helper_display_summary();
  693. _starpu_deinitialize_registered_performance_models();
  694. /* wait for their termination */
  695. _starpu_terminate_workers(&config);
  696. _starpu_delete_all_sched_ctxs();
  697. _starpu_destroy_topology(&config);
  698. #ifdef STARPU_USE_FXT
  699. _starpu_stop_fxt_profiling();
  700. #endif
  701. _starpu_data_interface_shutdown();
  702. /* Drop all remaining tags */
  703. _starpu_tag_clear();
  704. _starpu_close_debug_logfile();
  705. _STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
  706. initialized = UNINITIALIZED;
  707. /* Let someone else that wants to initialize it again do it */
  708. _STARPU_PTHREAD_COND_SIGNAL(&init_cond);
  709. _STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
  710. /* Clear memory if it was allocated by StarPU */
  711. if (config.default_conf)
  712. free(config.conf);
  713. _STARPU_DEBUG("Shutdown finished\n");
  714. }
  715. unsigned starpu_worker_get_count(void)
  716. {
  717. return config.topology.nworkers;
  718. }
  719. int starpu_worker_get_count_by_type(enum starpu_archtype type)
  720. {
  721. switch (type)
  722. {
  723. case STARPU_CPU_WORKER:
  724. return config.topology.ncpus;
  725. case STARPU_CUDA_WORKER:
  726. return config.topology.ncudagpus;
  727. case STARPU_OPENCL_WORKER:
  728. return config.topology.nopenclgpus;
  729. case STARPU_GORDON_WORKER:
  730. return config.topology.ngordon_spus;
  731. default:
  732. return -EINVAL;
  733. }
  734. }
  735. unsigned starpu_combined_worker_get_count(void)
  736. {
  737. return config.topology.ncombinedworkers;
  738. }
  739. unsigned starpu_cpu_worker_get_count(void)
  740. {
  741. return config.topology.ncpus;
  742. }
  743. unsigned starpu_cuda_worker_get_count(void)
  744. {
  745. return config.topology.ncudagpus;
  746. }
  747. unsigned starpu_opencl_worker_get_count(void)
  748. {
  749. return config.topology.nopenclgpus;
  750. }
  751. unsigned starpu_spu_worker_get_count(void)
  752. {
  753. return config.topology.ngordon_spus;
  754. }
  755. int starpu_asynchronous_copy_disabled()
  756. {
  757. return config.conf->disable_asynchronous_copy;
  758. }
  759. /* When analyzing performance, it is useful to see what is the processing unit
  760. * that actually performed the task. This function returns the id of the
  761. * processing unit actually executing it, therefore it makes no sense to use it
  762. * within the callbacks of SPU functions for instance. If called by some thread
  763. * that is not controlled by StarPU, starpu_worker_get_id returns -1. */
  764. int starpu_worker_get_id(void)
  765. {
  766. struct _starpu_worker * worker;
  767. worker = _starpu_get_local_worker_key();
  768. if (worker)
  769. {
  770. return worker->workerid;
  771. }
  772. else
  773. {
  774. /* there is no worker associated to that thread, perhaps it is
  775. * a thread from the application or this is some SPU worker */
  776. return -1;
  777. }
  778. }
  779. int starpu_combined_worker_get_id(void)
  780. {
  781. struct _starpu_worker *worker;
  782. worker = _starpu_get_local_worker_key();
  783. if (worker)
  784. {
  785. return worker->combined_workerid;
  786. }
  787. else
  788. {
  789. /* there is no worker associated to that thread, perhaps it is
  790. * a thread from the application or this is some SPU worker */
  791. return -1;
  792. }
  793. }
  794. int starpu_combined_worker_get_size(void)
  795. {
  796. struct _starpu_worker *worker;
  797. worker = _starpu_get_local_worker_key();
  798. if (worker)
  799. {
  800. return worker->worker_size;
  801. }
  802. else
  803. {
  804. /* there is no worker associated to that thread, perhaps it is
  805. * a thread from the application or this is some SPU worker */
  806. return -1;
  807. }
  808. }
  809. int starpu_combined_worker_get_rank(void)
  810. {
  811. struct _starpu_worker *worker;
  812. worker = _starpu_get_local_worker_key();
  813. if (worker)
  814. {
  815. return worker->current_rank;
  816. }
  817. else
  818. {
  819. /* there is no worker associated to that thread, perhaps it is
  820. * a thread from the application or this is some SPU worker */
  821. return -1;
  822. }
  823. }
  824. int starpu_worker_get_devid(int id)
  825. {
  826. return config.workers[id].devid;
  827. }
  828. struct _starpu_worker *_starpu_get_worker_struct(unsigned id)
  829. {
  830. return &config.workers[id];
  831. }
  832. unsigned starpu_worker_is_combined_worker(int id)
  833. {
  834. return id >= (int)config.topology.nworkers;
  835. }
  836. struct _starpu_sched_ctx *_starpu_get_sched_ctx_struct(unsigned id)
  837. {
  838. STARPU_ASSERT(id <= STARPU_NMAX_SCHED_CTXS);
  839. return &config.sched_ctxs[id];
  840. }
  841. struct _starpu_combined_worker *_starpu_get_combined_worker_struct(unsigned id)
  842. {
  843. unsigned basic_worker_count = starpu_worker_get_count();
  844. STARPU_ASSERT(id >= basic_worker_count);
  845. return &config.combined_workers[id - basic_worker_count];
  846. }
  847. enum starpu_archtype starpu_worker_get_type(int id)
  848. {
  849. return config.workers[id].arch;
  850. }
  851. int starpu_worker_get_ids_by_type(enum starpu_archtype type, int *workerids, int maxsize)
  852. {
  853. unsigned nworkers = starpu_worker_get_count();
  854. int cnt = 0;
  855. unsigned id;
  856. for (id = 0; id < nworkers; id++)
  857. {
  858. if (starpu_worker_get_type(id) == type)
  859. {
  860. /* Perhaps the array is too small ? */
  861. if (cnt >= maxsize)
  862. return -ERANGE;
  863. workerids[cnt++] = id;
  864. }
  865. }
  866. return cnt;
  867. }
  868. int starpu_worker_get_nids_by_type(enum starpu_archtype type, int *workerids, int maxsize)
  869. {
  870. unsigned nworkers = starpu_worker_get_count();
  871. int cnt = 0;
  872. unsigned id;
  873. for (id = 0; id < nworkers; id++)
  874. {
  875. if (starpu_worker_get_type(id) == type)
  876. {
  877. /* Perhaps the array is too small ? */
  878. if (cnt >= maxsize)
  879. return cnt;
  880. workerids[cnt++] = id;
  881. }
  882. }
  883. return cnt;
  884. }
  885. int starpu_worker_get_available_ids_by_type(enum starpu_archtype type, int *workerids, int maxsize)
  886. {
  887. unsigned nworkers = starpu_worker_get_count();
  888. int cnt = 0;
  889. unsigned id, worker;
  890. unsigned found = 0;
  891. for (id = 0; id < nworkers; id++)
  892. {
  893. found = 0;
  894. if (starpu_worker_get_type(id) == type)
  895. {
  896. /* Perhaps the array is too small ? */
  897. if (cnt >= maxsize)
  898. return cnt;
  899. int s;
  900. for(s = 1; s < STARPU_NMAX_SCHED_CTXS; s++)
  901. {
  902. if(config.sched_ctxs[s].id != STARPU_NMAX_SCHED_CTXS)
  903. {
  904. struct worker_collection *workers = config.sched_ctxs[s].workers;
  905. if(workers->init_cursor)
  906. workers->init_cursor(workers);
  907. while(workers->has_next(workers))
  908. {
  909. worker = workers->get_next(workers);
  910. if(worker == id)
  911. {
  912. found = 1;
  913. break;
  914. }
  915. }
  916. if(workers->init_cursor)
  917. workers->deinit_cursor(workers);
  918. if(found) break;
  919. }
  920. }
  921. if(!found)
  922. workerids[cnt++] = id;
  923. }
  924. }
  925. return cnt;
  926. }
  927. void starpu_worker_get_name(int id, char *dst, size_t maxlen)
  928. {
  929. char *name = config.workers[id].name;
  930. snprintf(dst, maxlen, "%s", name);
  931. }
  932. /* Retrieve the status which indicates what the worker is currently doing. */
  933. enum _starpu_worker_status _starpu_worker_get_status(int workerid)
  934. {
  935. return config.workers[workerid].status;
  936. }
  937. /* Change the status of the worker which indicates what the worker is currently
  938. * doing (eg. executing a callback). */
  939. void _starpu_worker_set_status(int workerid, enum _starpu_worker_status status)
  940. {
  941. config.workers[workerid].status = status;
  942. }
  943. struct _starpu_sched_ctx* _starpu_get_initial_sched_ctx(void)
  944. {
  945. return &config.sched_ctxs[0];
  946. }
  947. int
  948. starpu_driver_run(struct starpu_driver *d)
  949. {
  950. if (!d)
  951. return -EINVAL;
  952. switch (d->type)
  953. {
  954. #ifdef STARPU_USE_CPU
  955. case STARPU_CPU_WORKER:
  956. return _starpu_run_cpu(d);
  957. #endif
  958. #ifdef STARPU_USE_CUDA
  959. case STARPU_CUDA_WORKER:
  960. return _starpu_run_cuda(d);
  961. #endif
  962. #ifdef STARPU_USE_OPENCL
  963. case STARPU_OPENCL_WORKER:
  964. return _starpu_run_opencl(d);
  965. #endif
  966. case STARPU_GORDON_WORKER: /* Not supported yet */
  967. default:
  968. return -EINVAL;
  969. }
  970. }
  971. int
  972. starpu_driver_init(struct starpu_driver *d)
  973. {
  974. STARPU_ASSERT(d);
  975. switch (d->type)
  976. {
  977. #ifdef STARPU_USE_CPU
  978. case STARPU_CPU_WORKER:
  979. return _starpu_cpu_driver_init(d);
  980. #endif
  981. #ifdef STARPU_USE_CUDA
  982. case STARPU_CUDA_WORKER:
  983. return _starpu_cuda_driver_init(d);
  984. #endif
  985. #ifdef STARPU_USE_OPENCL
  986. case STARPU_OPENCL_WORKER:
  987. return _starpu_opencl_driver_init(d);
  988. #endif
  989. case STARPU_GORDON_WORKER: /* Not supported yet */
  990. default:
  991. return -EINVAL;
  992. }
  993. }
  994. int
  995. starpu_driver_run_once(struct starpu_driver *d)
  996. {
  997. STARPU_ASSERT(d);
  998. switch (d->type)
  999. {
  1000. #ifdef STARPU_USE_CPU
  1001. case STARPU_CPU_WORKER:
  1002. return _starpu_cpu_driver_run_once(d);
  1003. #endif
  1004. #ifdef STARPU_USE_CUDA
  1005. case STARPU_CUDA_WORKER:
  1006. return _starpu_cuda_driver_run_once(d);
  1007. #endif
  1008. #ifdef STARPU_USE_OPENCL
  1009. case STARPU_OPENCL_WORKER:
  1010. return _starpu_opencl_driver_run_once(d);
  1011. #endif
  1012. case STARPU_GORDON_WORKER: /* Not supported yet */
  1013. default:
  1014. return -EINVAL;
  1015. }
  1016. }
  1017. int
  1018. starpu_driver_deinit(struct starpu_driver *d)
  1019. {
  1020. STARPU_ASSERT(d);
  1021. switch (d->type)
  1022. {
  1023. #ifdef STARPU_USE_CPU
  1024. case STARPU_CPU_WORKER:
  1025. return _starpu_cpu_driver_deinit(d);
  1026. #endif
  1027. #ifdef STARPU_USE_CUDA
  1028. case STARPU_CUDA_WORKER:
  1029. return _starpu_cuda_driver_deinit(d);
  1030. #endif
  1031. #ifdef STARPU_USE_OPENCL
  1032. case STARPU_OPENCL_WORKER:
  1033. return _starpu_opencl_driver_deinit(d);
  1034. #endif
  1035. case STARPU_GORDON_WORKER: /* Not supported yet */
  1036. default:
  1037. return -EINVAL;
  1038. }
  1039. }