sched_ctx_utils.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. #include "sched_ctx_utils.h"
  2. #include <starpu.h>
  3. unsigned size1;
  4. unsigned size2;
  5. unsigned nblocks1;
  6. unsigned nblocks2;
  7. unsigned cpu1;
  8. unsigned cpu2;
  9. unsigned gpu;
  10. unsigned gpu1;
  11. unsigned gpu2;
  12. typedef struct {
  13. unsigned id;
  14. unsigned ctx;
  15. int the_other_ctx;
  16. int *procs;
  17. int nprocs;
  18. void (*bench)(unsigned, unsigned);
  19. unsigned size;
  20. unsigned nblocks;
  21. } params;
  22. typedef struct {
  23. double flops;
  24. double avg_timing;
  25. } retvals;
  26. #define NSAMPLES 1
  27. int first = 1;
  28. pthread_mutex_t mut;
  29. retvals rv[2];
  30. params p1, p2;
  31. pthread_key_t key;
  32. void init()
  33. {
  34. size1 = 4*1024;
  35. size2 = 4*1024;
  36. nblocks1 = 16;
  37. nblocks2 = 16;
  38. cpu1 = 0;
  39. cpu2 = 0;
  40. gpu = 0;
  41. gpu1 = 0;
  42. gpu2 = 0;
  43. rv[0].flops = 0.0;
  44. rv[1].flops = 0.0;
  45. rv[1].avg_timing = 0.0;
  46. rv[1].avg_timing = 0.0;
  47. p1.ctx = 0;
  48. p2.ctx = 0;
  49. p1.id = 0;
  50. p2.id = 1;
  51. pthread_key_create(&key, NULL);
  52. }
  53. void update_sched_ctx_timing_results(double flops, double avg_timing)
  54. {
  55. unsigned *id = pthread_getspecific(key);
  56. rv[*id].flops += flops;
  57. rv[*id].avg_timing += avg_timing;
  58. }
  59. void* start_bench(void *val){
  60. params *p = (params*)val;
  61. int i;
  62. pthread_setspecific(key, &p->id);
  63. if(p->ctx != 0)
  64. starpu_set_sched_ctx(&p->ctx);
  65. for(i = 0; i < NSAMPLES; i++)
  66. p->bench(p->size, p->nblocks);
  67. if(p->ctx != 0)
  68. {
  69. pthread_mutex_lock(&mut);
  70. if(first){
  71. starpu_delete_sched_ctx(p->ctx, p->the_other_ctx);
  72. }
  73. first = 0;
  74. pthread_mutex_unlock(&mut);
  75. }
  76. rv[p->id].flops /= NSAMPLES;
  77. rv[p->id].avg_timing /= NSAMPLES;
  78. return NULL;
  79. }
  80. void start_2benchs(void (*bench)(unsigned, unsigned))
  81. {
  82. p1.bench = bench;
  83. p1.size = size1;
  84. printf("size %d\n", size1);
  85. p1.nblocks = nblocks1;
  86. p2.bench = bench;
  87. p2.size = size2;
  88. printf("size %d\n", size2);
  89. p2.nblocks = nblocks2;
  90. pthread_t tid[2];
  91. pthread_mutex_init(&mut, NULL);
  92. struct timeval start;
  93. struct timeval end;
  94. gettimeofday(&start, NULL);
  95. pthread_create(&tid[0], NULL, (void*)start_bench, (void*)&p1);
  96. pthread_create(&tid[1], NULL, (void*)start_bench, (void*)&p2);
  97. pthread_join(tid[0], NULL);
  98. pthread_join(tid[1], NULL);
  99. gettimeofday(&end, NULL);
  100. pthread_mutex_destroy(&mut);
  101. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  102. timing /= 1000000;
  103. printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops);
  104. printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing);
  105. }
  106. void start_1stbench(void (*bench)(unsigned, unsigned))
  107. {
  108. p1.bench = bench;
  109. p1.size = size1;
  110. p1.nblocks = nblocks1;
  111. struct timeval start;
  112. struct timeval end;
  113. gettimeofday(&start, NULL);
  114. start_bench((void*)&p1);
  115. gettimeofday(&end, NULL);
  116. pthread_mutex_destroy(&mut);
  117. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  118. timing /= 1000000;
  119. printf("%2.2f ", rv[0].flops);
  120. printf("%2.2f %2.2f\n", rv[0].avg_timing, timing);
  121. }
  122. void start_2ndbench(void (*bench)(unsigned, unsigned))
  123. {
  124. p2.bench = bench;
  125. p2.size = size2;
  126. p2.nblocks = nblocks2;
  127. struct timeval start;
  128. struct timeval end;
  129. gettimeofday(&start, NULL);
  130. start_bench((void*)&p2);
  131. gettimeofday(&end, NULL);
  132. pthread_mutex_destroy(&mut);
  133. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  134. timing /= 1000000;
  135. printf("%2.2f ", rv[1].flops);
  136. printf("%2.2f %2.2f\n", rv[1].avg_timing, timing);
  137. }
  138. void construct_contexts(void (*bench)(unsigned, unsigned))
  139. {
  140. int nprocs1 = cpu1 + gpu + gpu1;
  141. int nprocs2 = cpu2 + gpu + gpu2;
  142. unsigned n_all_gpus = gpu + gpu1 + gpu2;
  143. int procs[nprocs1];
  144. int i;
  145. int k = 0;
  146. for(i = 0; i < gpu; i++)
  147. {
  148. procs[k++] = i;
  149. printf("%d ", i);
  150. }
  151. for(i = gpu; i < gpu + gpu1; i++)
  152. {
  153. procs[k++] = i;
  154. printf("%d ", i);
  155. }
  156. for(i = n_all_gpus; i < n_all_gpus + cpu1; i++)
  157. {
  158. procs[k++] = i;
  159. printf("%d ", i);
  160. }
  161. printf("\n ");
  162. p1.ctx = starpu_create_sched_ctx("heft", procs, nprocs1, "sched_ctx1");
  163. p2.the_other_ctx = (int)p1.ctx;
  164. p1.procs = procs;
  165. p1.nprocs = nprocs1;
  166. int procs2[nprocs2];
  167. k = 0;
  168. for(i = 0; i < gpu; i++)
  169. {
  170. procs2[k++] = i;
  171. printf("%d ", i);
  172. }
  173. for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++)
  174. {
  175. procs2[k++] = i;
  176. printf("%d ", i);
  177. }
  178. for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++)
  179. {
  180. procs2[k++] = i;
  181. printf("%d ", i);
  182. }
  183. printf("\n");
  184. p2.ctx = starpu_create_sched_ctx("heft", procs2, nprocs2, "sched_ctx2");
  185. p1.the_other_ctx = (int)p2.ctx;
  186. p2.procs = procs2;
  187. p2.nprocs = nprocs2;
  188. }
  189. void parse_args_ctx(int argc, char **argv)
  190. {
  191. init();
  192. int i;
  193. for (i = 1; i < argc; i++) {
  194. if (strcmp(argv[i], "-size1") == 0) {
  195. char *argptr;
  196. size1 = strtol(argv[++i], &argptr, 10);
  197. }
  198. if (strcmp(argv[i], "-nblocks1") == 0) {
  199. char *argptr;
  200. nblocks1 = strtol(argv[++i], &argptr, 10);
  201. }
  202. if (strcmp(argv[i], "-size2") == 0) {
  203. char *argptr;
  204. size2 = strtol(argv[++i], &argptr, 10);
  205. }
  206. if (strcmp(argv[i], "-nblocks2") == 0) {
  207. char *argptr;
  208. nblocks2 = strtol(argv[++i], &argptr, 10);
  209. }
  210. if (strcmp(argv[i], "-cpu1") == 0) {
  211. char *argptr;
  212. cpu1 = strtol(argv[++i], &argptr, 10);
  213. }
  214. if (strcmp(argv[i], "-cpu2") == 0) {
  215. char *argptr;
  216. cpu2 = strtol(argv[++i], &argptr, 10);
  217. }
  218. if (strcmp(argv[i], "-gpu") == 0) {
  219. char *argptr;
  220. gpu = strtol(argv[++i], &argptr, 10);
  221. }
  222. if (strcmp(argv[i], "-gpu1") == 0) {
  223. char *argptr;
  224. gpu1 = strtol(argv[++i], &argptr, 10);
  225. }
  226. if (strcmp(argv[i], "-gpu2") == 0) {
  227. char *argptr;
  228. gpu2 = strtol(argv[++i], &argptr, 10);
  229. }
  230. }
  231. }