sched_ctx_utils.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2013,2016 Inria
  4. * Copyright (C) 2012-2013,2017 CNRS
  5. * Copyright (C) 2015 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include "sched_ctx_utils.h"
  19. #include <starpu.h>
  20. #include "sc_hypervisor.h"
  21. #define NSAMPLES 3
  22. unsigned size1;
  23. unsigned size2;
  24. unsigned nblocks1;
  25. unsigned nblocks2;
  26. unsigned cpu1;
  27. unsigned cpu2;
  28. unsigned gpu;
  29. unsigned gpu1;
  30. unsigned gpu2;
  31. typedef struct
  32. {
  33. unsigned id;
  34. unsigned ctx;
  35. int the_other_ctx;
  36. int *workers;
  37. int nworkers;
  38. void (*bench)(float*, unsigned, unsigned);
  39. unsigned size;
  40. unsigned nblocks;
  41. float *mat[NSAMPLES];
  42. } params;
  43. typedef struct
  44. {
  45. double flops;
  46. double avg_timing;
  47. } retvals;
  48. int first = 1;
  49. starpu_pthread_mutex_t mut;
  50. retvals rv[2];
  51. params p1, p2;
  52. int it = 0;
  53. int it2 = 0;
  54. starpu_pthread_key_t key;
  55. void init()
  56. {
  57. size1 = 4*1024;
  58. size2 = 4*1024;
  59. nblocks1 = 16;
  60. nblocks2 = 16;
  61. cpu1 = 0;
  62. cpu2 = 0;
  63. gpu = 0;
  64. gpu1 = 0;
  65. gpu2 = 0;
  66. rv[0].flops = 0.0;
  67. rv[1].flops = 0.0;
  68. rv[1].avg_timing = 0.0;
  69. rv[1].avg_timing = 0.0;
  70. p1.ctx = 0;
  71. p2.ctx = 0;
  72. p1.id = 0;
  73. p2.id = 1;
  74. STARPU_PTHREAD_KEY_CREATE(&key, NULL);
  75. }
  76. void update_sched_ctx_timing_results(double flops, double avg_timing)
  77. {
  78. unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key);
  79. rv[*id].flops += flops;
  80. rv[*id].avg_timing += avg_timing;
  81. }
  82. void* start_bench(void *val)
  83. {
  84. params *p = (params*)val;
  85. int i;
  86. STARPU_PTHREAD_SETSPECIFIC(key, &p->id);
  87. if(p->ctx != 0)
  88. starpu_sched_ctx_set_context(&p->ctx);
  89. for(i = 0; i < NSAMPLES; i++)
  90. p->bench(p->mat[i], p->size, p->nblocks);
  91. /* if(p->ctx != 0) */
  92. /* { */
  93. /* STARPU_PTHREAD_MUTEX_LOCK(&mut); */
  94. /* if(first){ */
  95. /* sc_hypervisor_unregiser_ctx(p->ctx); */
  96. /* starpu_sched_ctx_delete(p->ctx, p->the_other_ctx); */
  97. /* } */
  98. /* first = 0; */
  99. /* STARPU_PTHREAD_MUTEX_UNLOCK(&mut); */
  100. /* } */
  101. sc_hypervisor_stop_resize(p->the_other_ctx);
  102. rv[p->id].flops /= NSAMPLES;
  103. rv[p->id].avg_timing /= NSAMPLES;
  104. return NULL;
  105. }
  106. float* construct_matrix(unsigned size)
  107. {
  108. float *mat;
  109. starpu_malloc((void **)&mat, (size_t)size*size*sizeof(float));
  110. unsigned i,j;
  111. for (i = 0; i < size; i++)
  112. {
  113. for (j = 0; j < size; j++)
  114. {
  115. mat[j +i*size] = (1.0f/(1.0f+i+j)) + ((i == j)?1.0f*size:0.0f);
  116. /* mat[j +i*size] = ((i == j)?1.0f*size:0.0f); */
  117. }
  118. }
  119. return mat;
  120. }
  121. void start_2benchs(void (*bench)(float*, unsigned, unsigned))
  122. {
  123. p1.bench = bench;
  124. p1.size = size1;
  125. p1.nblocks = nblocks1;
  126. p2.bench = bench;
  127. p2.size = size2;
  128. p2.nblocks = nblocks2;
  129. int i;
  130. for(i = 0; i < NSAMPLES; i++)
  131. {
  132. p1.mat[i] = construct_matrix(p1.size);
  133. p2.mat[i] = construct_matrix(p2.size);
  134. }
  135. starpu_pthread_t tid[2];
  136. STARPU_PTHREAD_MUTEX_INIT(&mut, NULL);
  137. struct timeval start;
  138. struct timeval end;
  139. gettimeofday(&start, NULL);
  140. STARPU_PTHREAD_CREATE(&tid[0], NULL, (void*)start_bench, (void*)&p1);
  141. STARPU_PTHREAD_CREATE(&tid[1], NULL, (void*)start_bench, (void*)&p2);
  142. STARPU_PTHREAD_JOIN(tid[0], NULL);
  143. STARPU_PTHREAD_JOIN(tid[1], NULL);
  144. gettimeofday(&end, NULL);
  145. STARPU_PTHREAD_MUTEX_DESTROY(&mut);
  146. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  147. timing /= 1000000;
  148. printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops);
  149. printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing);
  150. }
  151. void start_1stbench(void (*bench)(float*, unsigned, unsigned))
  152. {
  153. p1.bench = bench;
  154. p1.size = size1;
  155. p1.nblocks = nblocks1;
  156. int i;
  157. for(i = 0; i < NSAMPLES; i++)
  158. {
  159. p1.mat[i] = construct_matrix(p1.size);
  160. }
  161. struct timeval start;
  162. struct timeval end;
  163. gettimeofday(&start, NULL);
  164. start_bench((void*)&p1);
  165. gettimeofday(&end, NULL);
  166. STARPU_PTHREAD_MUTEX_DESTROY(&mut);
  167. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  168. timing /= 1000000;
  169. printf("%2.2f ", rv[0].flops);
  170. printf("%2.2f %2.2f\n", rv[0].avg_timing, timing);
  171. }
  172. void start_2ndbench(void (*bench)(float*, unsigned, unsigned))
  173. {
  174. p2.bench = bench;
  175. p2.size = size2;
  176. p2.nblocks = nblocks2;
  177. int i;
  178. for(i = 0; i < NSAMPLES; i++)
  179. {
  180. p2.mat[i] = construct_matrix(p2.size);
  181. }
  182. struct timeval start;
  183. struct timeval end;
  184. gettimeofday(&start, NULL);
  185. start_bench((void*)&p2);
  186. gettimeofday(&end, NULL);
  187. STARPU_PTHREAD_MUTEX_DESTROY(&mut);
  188. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  189. timing /= 1000000;
  190. printf("%2.2f ", rv[1].flops);
  191. printf("%2.2f %2.2f\n", rv[1].avg_timing, timing);
  192. }
  193. void construct_contexts()
  194. {
  195. struct sc_hypervisor_policy policy;
  196. policy.custom = 0;
  197. policy.name = "idle";
  198. void *perf_counters = sc_hypervisor_init(&policy);
  199. int nworkers1 = cpu1 + gpu + gpu1;
  200. int nworkers2 = cpu2 + gpu + gpu2;
  201. /* unsigned n_all_gpus = gpu + gpu1 + gpu2; */
  202. int i;
  203. /* int k = 0; */
  204. nworkers1 = 12;
  205. p1.workers = (int*)malloc(nworkers1*sizeof(int));
  206. /* for(i = 0; i < gpu; i++) */
  207. /* p1.workers[k++] = i; */
  208. /* for(i = gpu; i < gpu + gpu1; i++) */
  209. /* p1.workers[k++] = i; */
  210. /* for(i = n_all_gpus; i < n_all_gpus + cpu1; i++) */
  211. /* p1.workers[k++] = i; */
  212. for(i = 0; i < 12; i++)
  213. p1.workers[i] = i;
  214. p1.ctx = starpu_sched_ctx_create(p1.workers, nworkers1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0);
  215. starpu_sched_ctx_set_perf_counters(p1.ctx, perf_counters);
  216. p2.the_other_ctx = (int)p1.ctx;
  217. p1.nworkers = nworkers1;
  218. sc_hypervisor_register_ctx(p1.ctx, 0.0);
  219. /* sc_hypervisor_ctl(p1.ctx, */
  220. /* SC_HYPERVISOR_MAX_IDLE, p1.workers, p1.nworkers, 5000.0, */
  221. /* SC_HYPERVISOR_MAX_IDLE, p1.workers, gpu+gpu1, 100000.0, */
  222. /* SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */
  223. /* SC_HYPERVISOR_GRANULARITY, 2, */
  224. /* SC_HYPERVISOR_MIN_TASKS, 1000, */
  225. /* SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 100000.0, */
  226. /* SC_HYPERVISOR_MIN_WORKERS, 6, */
  227. /* SC_HYPERVISOR_MAX_WORKERS, 12, */
  228. /* NULL); */
  229. sc_hypervisor_ctl(p1.ctx,
  230. SC_HYPERVISOR_GRANULARITY, 2,
  231. SC_HYPERVISOR_MIN_TASKS, 1000,
  232. SC_HYPERVISOR_MIN_WORKERS, 6,
  233. SC_HYPERVISOR_MAX_WORKERS, 12,
  234. NULL);
  235. /* k = 0; */
  236. p2.workers = (int*)malloc(nworkers2*sizeof(int));
  237. /* for(i = 0; i < gpu; i++) */
  238. /* p2.workers[k++] = i; */
  239. /* for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++) */
  240. /* p2.workers[k++] = i; */
  241. /* for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++) */
  242. /* p2.workers[k++] = i; */
  243. p2.ctx = starpu_sched_ctx_create(p2.workers, 0, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0);
  244. starpu_sched_ctx_set_perf_counters(p2.ctx, perf_counters);
  245. p1.the_other_ctx = (int)p2.ctx;
  246. p2.nworkers = 0;
  247. sc_hypervisor_register_ctx(p2.ctx, 0.0);
  248. /* sc_hypervisor_ctl(p2.ctx, */
  249. /* SC_HYPERVISOR_MAX_IDLE, p2.workers, p2.nworkers, 2000.0, */
  250. /* SC_HYPERVISOR_MAX_IDLE, p2.workers, gpu+gpu2, 5000.0, */
  251. /* SC_HYPERVISOR_EMPTY_CTX_MAX_IDLE, p1.workers, p1.nworkers, 500000.0, */
  252. /* SC_HYPERVISOR_GRANULARITY, 2, */
  253. /* SC_HYPERVISOR_MIN_TASKS, 500, */
  254. /* SC_HYPERVISOR_NEW_WORKERS_MAX_IDLE, 1000.0, */
  255. /* SC_HYPERVISOR_MIN_WORKERS, 4, */
  256. /* SC_HYPERVISOR_MAX_WORKERS, 8, */
  257. /* NULL); */
  258. sc_hypervisor_ctl(p2.ctx,
  259. SC_HYPERVISOR_GRANULARITY, 2,
  260. SC_HYPERVISOR_MIN_TASKS, 500,
  261. SC_HYPERVISOR_MIN_WORKERS, 0,
  262. SC_HYPERVISOR_MAX_WORKERS, 6,
  263. NULL);
  264. }
  265. void set_hypervisor_conf(int event, int task_tag)
  266. {
  267. /* unsigned *id = STARPU_PTHREAD_GETSPECIFIC(key); */
  268. /* if(*id == 0) */
  269. /* { */
  270. /* if(event == END_BENCH) */
  271. /* { */
  272. /* if(it < 2) */
  273. /* { */
  274. /* sc_hypervisor_ctl(p2.ctx, */
  275. /* SC_HYPERVISOR_MIN_WORKERS, 2, */
  276. /* SC_HYPERVISOR_MAX_WORKERS, 4, */
  277. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  278. /* NULL); */
  279. /* printf("%d: set max %d for tag %d\n", p2.ctx, 4, task_tag); */
  280. /* sc_hypervisor_ctl(p1.ctx, */
  281. /* SC_HYPERVISOR_MIN_WORKERS, 6, */
  282. /* SC_HYPERVISOR_MAX_WORKERS, 8, */
  283. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  284. /* NULL); */
  285. /* printf("%d: set max %d for tag %d\n", p1.ctx, 8, task_tag); */
  286. /* sc_hypervisor_resize(p1.ctx, task_tag); */
  287. /* } */
  288. /* if(it == 2) */
  289. /* { */
  290. /* sc_hypervisor_ctl(p2.ctx, */
  291. /* SC_HYPERVISOR_MIN_WORKERS, 12, */
  292. /* SC_HYPERVISOR_MAX_WORKERS, 12, */
  293. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  294. /* NULL); */
  295. /* printf("%d: set max %d for tag %d\n", p2.ctx, 12, task_tag); */
  296. /* sc_hypervisor_ctl(p1.ctx, */
  297. /* SC_HYPERVISOR_MIN_WORKERS, 0, */
  298. /* SC_HYPERVISOR_MAX_WORKERS, 0, */
  299. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  300. /* NULL); */
  301. /* printf("%d: set max %d for tag %d\n", p1.ctx, 0, task_tag); */
  302. /* sc_hypervisor_resize(p1.ctx, task_tag); */
  303. /* } */
  304. /* it++; */
  305. /* } */
  306. /* } */
  307. /* else */
  308. /* { */
  309. /* if(event == END_BENCH) */
  310. /* { */
  311. /* if(it2 < 3) */
  312. /* { */
  313. /* sc_hypervisor_ctl(p1.ctx, */
  314. /* SC_HYPERVISOR_MIN_WORKERS, 6, */
  315. /* SC_HYPERVISOR_MAX_WORKERS, 12, */
  316. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  317. /* NULL); */
  318. /* printf("%d: set max %d for tag %d\n", p1.ctx, 12, task_tag); */
  319. /* sc_hypervisor_ctl(p2.ctx, */
  320. /* SC_HYPERVISOR_MIN_WORKERS, 0, */
  321. /* SC_HYPERVISOR_MAX_WORKERS, 0, */
  322. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  323. /* NULL); */
  324. /* printf("%d: set max %d for tag %d\n", p2.ctx, 0, task_tag); */
  325. /* sc_hypervisor_resize(p2.ctx, task_tag); */
  326. /* } */
  327. /* it2++; */
  328. /* } */
  329. /* } */
  330. /* if(*id == 1) */
  331. /* { */
  332. /* if(event == START_BENCH) */
  333. /* { */
  334. /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
  335. /* sc_hypervisor_ctl(p1.ctx, */
  336. /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 800000.0, */
  337. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  338. /* NULL); */
  339. /* } */
  340. /* else */
  341. /* { */
  342. /* if(it2 < 2) */
  343. /* { */
  344. /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
  345. /* sc_hypervisor_ctl(p2.ctx, */
  346. /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 500.0, */
  347. /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 200.0, */
  348. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  349. /* NULL); */
  350. /* } */
  351. /* if(it2 == 2) */
  352. /* { */
  353. /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
  354. /* sc_hypervisor_ctl(p2.ctx, */
  355. /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 1000.0, */
  356. /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 500.0, */
  357. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  358. /* SC_HYPERVISOR_MAX_WORKERS, 12, */
  359. /* NULL); */
  360. /* } */
  361. /* it2++; */
  362. /* } */
  363. /* } else { */
  364. /* if(event == START_BENCH) */
  365. /* { */
  366. /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
  367. /* sc_hypervisor_ctl(p1.ctx, */
  368. /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 1500.0, */
  369. /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 4000.0, */
  370. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  371. /* NULL); */
  372. /* } */
  373. /* if(event == END_BENCH) */
  374. /* { */
  375. /* if(it < 2) */
  376. /* { */
  377. /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
  378. /* sc_hypervisor_ctl(p1.ctx, */
  379. /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 100.0, */
  380. /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 5000.0, */
  381. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  382. /* NULL); */
  383. /* } */
  384. /* if(it == 2) */
  385. /* { */
  386. /* int workers[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; */
  387. /* sc_hypervisor_ctl(p1.ctx, */
  388. /* SC_HYPERVISOR_MAX_IDLE, workers, 12, 5000.0, */
  389. /* SC_HYPERVISOR_MAX_IDLE, workers, 3, 10000.0, */
  390. /* SC_HYPERVISOR_TIME_TO_APPLY, task_tag, */
  391. /* NULL); */
  392. /* } */
  393. /* it++; */
  394. /* } */
  395. /* } */
  396. }
  397. void end_contexts()
  398. {
  399. free(p1.workers);
  400. free(p2.workers);
  401. sc_hypervisor_shutdown();
  402. }
  403. void parse_args_ctx(int argc, char **argv)
  404. {
  405. init();
  406. int i;
  407. for (i = 1; i < argc; i++) {
  408. if (strcmp(argv[i], "-size1") == 0) {
  409. char *argptr;
  410. size1 = strtol(argv[++i], &argptr, 10);
  411. }
  412. if (strcmp(argv[i], "-nblocks1") == 0) {
  413. char *argptr;
  414. nblocks1 = strtol(argv[++i], &argptr, 10);
  415. }
  416. if (strcmp(argv[i], "-size2") == 0) {
  417. char *argptr;
  418. size2 = strtol(argv[++i], &argptr, 10);
  419. }
  420. if (strcmp(argv[i], "-nblocks2") == 0) {
  421. char *argptr;
  422. nblocks2 = strtol(argv[++i], &argptr, 10);
  423. }
  424. if (strcmp(argv[i], "-cpu1") == 0) {
  425. char *argptr;
  426. cpu1 = strtol(argv[++i], &argptr, 10);
  427. }
  428. if (strcmp(argv[i], "-cpu2") == 0) {
  429. char *argptr;
  430. cpu2 = strtol(argv[++i], &argptr, 10);
  431. }
  432. if (strcmp(argv[i], "-gpu") == 0) {
  433. char *argptr;
  434. gpu = strtol(argv[++i], &argptr, 10);
  435. }
  436. if (strcmp(argv[i], "-gpu1") == 0) {
  437. char *argptr;
  438. gpu1 = strtol(argv[++i], &argptr, 10);
  439. }
  440. if (strcmp(argv[i], "-gpu2") == 0) {
  441. char *argptr;
  442. gpu2 = strtol(argv[++i], &argptr, 10);
  443. }
  444. }
  445. }