sched_ctx_utils.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. #include "sched_ctx_utils.h"
  2. #include <starpu.h>
  3. unsigned size1;
  4. unsigned size2;
  5. unsigned nblocks1;
  6. unsigned nblocks2;
  7. unsigned cpu1;
  8. unsigned cpu2;
  9. unsigned gpu;
  10. unsigned gpu1;
  11. unsigned gpu2;
  12. typedef struct {
  13. unsigned id;
  14. unsigned ctx;
  15. int the_other_ctx;
  16. int *procs;
  17. int nprocs;
  18. void (*bench)(unsigned, unsigned);
  19. unsigned size;
  20. unsigned nblocks;
  21. } params;
  22. typedef struct {
  23. double flops;
  24. double avg_timing;
  25. } retvals;
  26. #define NSAMPLES 1
  27. int first = 1;
  28. pthread_mutex_t mut;
  29. retvals rv[2];
  30. params p1, p2;
  31. pthread_key_t key;
  32. void init()
  33. {
  34. size1 = 4*1024;
  35. size2 = 4*1024;
  36. nblocks1 = 16;
  37. nblocks2 = 16;
  38. cpu1 = 0;
  39. cpu2 = 0;
  40. gpu = 0;
  41. gpu1 = 0;
  42. gpu2 = 0;
  43. rv[0].flops = 0.0;
  44. rv[1].flops = 0.0;
  45. rv[1].avg_timing = 0.0;
  46. rv[1].avg_timing = 0.0;
  47. p1.ctx = 0;
  48. p2.ctx = 0;
  49. p1.id = 0;
  50. p2.id = 1;
  51. pthread_key_create(&key, NULL);
  52. }
  53. void update_sched_ctx_timing_results(double flops, double avg_timing)
  54. {
  55. unsigned *id = pthread_getspecific(key);
  56. rv[*id].flops += flops;
  57. rv[*id].avg_timing += avg_timing;
  58. }
  59. void* start_bench(void *val){
  60. params *p = (params*)val;
  61. int i;
  62. pthread_setspecific(key, &p->id);
  63. if(p->ctx != 0)
  64. starpu_set_sched_ctx(&p->ctx);
  65. for(i = 0; i < NSAMPLES; i++)
  66. p->bench(p->size, p->nblocks);
  67. if(p->ctx != 0)
  68. {
  69. pthread_mutex_lock(&mut);
  70. if(first){
  71. starpu_delete_sched_ctx(p->ctx, p->the_other_ctx);
  72. }
  73. first = 0;
  74. pthread_mutex_unlock(&mut);
  75. }
  76. rv[p->id].flops /= NSAMPLES;
  77. rv[p->id].avg_timing /= NSAMPLES;
  78. }
  79. void start_2benchs(void (*bench)(unsigned, unsigned))
  80. {
  81. p1.bench = bench;
  82. p1.size = size1;
  83. printf("size %d\n", size1);
  84. p1.nblocks = nblocks1;
  85. p2.bench = bench;
  86. p2.size = size2;
  87. printf("size %d\n", size2);
  88. p2.nblocks = nblocks2;
  89. pthread_t tid[2];
  90. pthread_mutex_init(&mut, NULL);
  91. struct timeval start;
  92. struct timeval end;
  93. gettimeofday(&start, NULL);
  94. pthread_create(&tid[0], NULL, (void*)start_bench, (void*)&p1);
  95. pthread_create(&tid[1], NULL, (void*)start_bench, (void*)&p2);
  96. pthread_join(tid[0], NULL);
  97. pthread_join(tid[1], NULL);
  98. gettimeofday(&end, NULL);
  99. pthread_mutex_destroy(&mut);
  100. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  101. timing /= 1000000;
  102. printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops);
  103. printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing);
  104. }
  105. void start_1stbench(void (*bench)(unsigned, unsigned))
  106. {
  107. p1.bench = bench;
  108. p1.size = size1;
  109. p1.nblocks = nblocks1;
  110. struct timeval start;
  111. struct timeval end;
  112. gettimeofday(&start, NULL);
  113. start_bench((void*)&p1);
  114. gettimeofday(&end, NULL);
  115. pthread_mutex_destroy(&mut);
  116. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  117. timing /= 1000000;
  118. printf("%2.2f ", rv[0].flops);
  119. printf("%2.2f %2.2f\n", rv[0].avg_timing, timing);
  120. }
  121. void start_2ndbench(void (*bench)(unsigned, unsigned))
  122. {
  123. p2.bench = bench;
  124. p2.size = size2;
  125. p2.nblocks = nblocks2;
  126. struct timeval start;
  127. struct timeval end;
  128. gettimeofday(&start, NULL);
  129. start_bench((void*)&p2);
  130. gettimeofday(&end, NULL);
  131. pthread_mutex_destroy(&mut);
  132. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  133. timing /= 1000000;
  134. printf("%2.2f ", rv[1].flops);
  135. printf("%2.2f %2.2f\n", rv[1].avg_timing, timing);
  136. }
  137. void construct_contexts(void (*bench)(unsigned, unsigned))
  138. {
  139. int nprocs1 = cpu1 + gpu + gpu1;
  140. int nprocs2 = cpu2 + gpu + gpu2;
  141. unsigned n_all_gpus = gpu + gpu1 + gpu2;
  142. int procs[nprocs1];
  143. int i;
  144. int k = 0;
  145. for(i = 0; i < gpu; i++)
  146. {
  147. procs[k++] = i;
  148. printf("%d ", i);
  149. }
  150. for(i = gpu; i < gpu + gpu1; i++)
  151. {
  152. procs[k++] = i;
  153. printf("%d ", i);
  154. }
  155. for(i = n_all_gpus; i < n_all_gpus + cpu1; i++)
  156. {
  157. procs[k++] = i;
  158. printf("%d ", i);
  159. }
  160. printf("\n ");
  161. p1.ctx = starpu_create_sched_ctx("heft", procs, nprocs1, "sched_ctx1");
  162. p2.the_other_ctx = (int)p1.ctx;
  163. p1.procs = procs;
  164. p1.nprocs = nprocs1;
  165. int procs2[nprocs2];
  166. k = 0;
  167. for(i = 0; i < gpu; i++)
  168. {
  169. procs2[k++] = i;
  170. printf("%d ", i);
  171. }
  172. for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++)
  173. {
  174. procs2[k++] = i;
  175. printf("%d ", i);
  176. }
  177. for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++)
  178. {
  179. procs2[k++] = i;
  180. printf("%d ", i);
  181. }
  182. printf("\n");
  183. p2.ctx = starpu_create_sched_ctx("heft", procs2, nprocs2, "sched_ctx2");
  184. p1.the_other_ctx = (int)p2.ctx;
  185. p2.procs = procs2;
  186. p2.nprocs = nprocs2;
  187. }
  188. void parse_args_ctx(int argc, char **argv)
  189. {
  190. init();
  191. int i;
  192. for (i = 1; i < argc; i++) {
  193. if (strcmp(argv[i], "-size1") == 0) {
  194. char *argptr;
  195. size1 = strtol(argv[++i], &argptr, 10);
  196. }
  197. if (strcmp(argv[i], "-nblocks1") == 0) {
  198. char *argptr;
  199. nblocks1 = strtol(argv[++i], &argptr, 10);
  200. }
  201. if (strcmp(argv[i], "-size2") == 0) {
  202. char *argptr;
  203. size2 = strtol(argv[++i], &argptr, 10);
  204. }
  205. if (strcmp(argv[i], "-nblocks2") == 0) {
  206. char *argptr;
  207. nblocks2 = strtol(argv[++i], &argptr, 10);
  208. }
  209. if (strcmp(argv[i], "-cpu1") == 0) {
  210. char *argptr;
  211. cpu1 = strtol(argv[++i], &argptr, 10);
  212. }
  213. if (strcmp(argv[i], "-cpu2") == 0) {
  214. char *argptr;
  215. cpu2 = strtol(argv[++i], &argptr, 10);
  216. }
  217. if (strcmp(argv[i], "-gpu") == 0) {
  218. char *argptr;
  219. gpu = strtol(argv[++i], &argptr, 10);
  220. }
  221. if (strcmp(argv[i], "-gpu1") == 0) {
  222. char *argptr;
  223. gpu1 = strtol(argv[++i], &argptr, 10);
  224. }
  225. if (strcmp(argv[i], "-gpu2") == 0) {
  226. char *argptr;
  227. gpu2 = strtol(argv[++i], &argptr, 10);
  228. }
  229. }
  230. }