sched_ctx_utils.c 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. #include "sched_ctx_utils.h"
  2. #include <starpu.h>
  3. unsigned size1;
  4. unsigned size2;
  5. unsigned nblocks1;
  6. unsigned nblocks2;
  7. unsigned cpu1;
  8. unsigned cpu2;
  9. unsigned gpu;
  10. unsigned gpu1;
  11. unsigned gpu2;
  12. typedef struct {
  13. unsigned id;
  14. unsigned ctx;
  15. int the_other_ctx;
  16. int *procs;
  17. int nprocs;
  18. void (*bench)(unsigned, unsigned);
  19. unsigned size;
  20. unsigned nblocks;
  21. } params;
  22. typedef struct {
  23. double flops;
  24. double avg_timing;
  25. } retvals;
  26. #define NSAMPLES 3
  27. int first = 1;
  28. pthread_mutex_t mut;
  29. retvals rv[2];
  30. params p1, p2;
  31. pthread_key_t key;
  32. void init()
  33. {
  34. size1 = 4*1024;
  35. size2 = 4*1024;
  36. nblocks1 = 16;
  37. nblocks2 = 16;
  38. cpu1 = 0;
  39. cpu2 = 0;
  40. gpu = 0;
  41. gpu1 = 0;
  42. gpu2 = 0;
  43. rv[0].flops = 0.0;
  44. rv[1].flops = 0.0;
  45. rv[1].avg_timing = 0.0;
  46. rv[1].avg_timing = 0.0;
  47. p1.ctx = 0;
  48. p2.ctx = 0;
  49. p1.id = 0;
  50. p2.id = 1;
  51. pthread_key_create(&key, NULL);
  52. }
  53. void update_sched_ctx_timing_results(double flops, double avg_timing)
  54. {
  55. unsigned *id = pthread_getspecific(key);
  56. rv[*id].flops += flops;
  57. rv[*id].avg_timing += avg_timing;
  58. }
  59. void* start_bench(void *val){
  60. params *p = (params*)val;
  61. int i;
  62. pthread_setspecific(key, &p->id);
  63. if(p->ctx != 0)
  64. starpu_set_sched_ctx(&p->ctx);
  65. for(i = 0; i < NSAMPLES; i++)
  66. p->bench(p->size, p->nblocks);
  67. if(p->ctx != 0)
  68. {
  69. pthread_mutex_lock(&mut);
  70. if(first){
  71. starpu_delete_sched_ctx(p->ctx, p->the_other_ctx);
  72. }
  73. first = 0;
  74. pthread_mutex_unlock(&mut);
  75. }
  76. rv[p->id].flops /= NSAMPLES;
  77. rv[p->id].avg_timing /= NSAMPLES;
  78. }
  79. void start_2benchs(void (*bench)(unsigned, unsigned))
  80. {
  81. p1.bench = bench;
  82. p1.size = size1;
  83. p1.nblocks = nblocks1;
  84. p2.bench = bench;
  85. p2.size = size2;
  86. p2.nblocks = nblocks2;
  87. pthread_t tid[2];
  88. pthread_mutex_init(&mut, NULL);
  89. struct timeval start;
  90. struct timeval end;
  91. gettimeofday(&start, NULL);
  92. pthread_create(&tid[0], NULL, (void*)start_bench, (void*)&p1);
  93. pthread_create(&tid[1], NULL, (void*)start_bench, (void*)&p2);
  94. pthread_join(tid[0], NULL);
  95. pthread_join(tid[1], NULL);
  96. gettimeofday(&end, NULL);
  97. pthread_mutex_destroy(&mut);
  98. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  99. timing /= 1000000;
  100. printf("%2.2f %2.2f ", rv[0].flops, rv[0].flops);
  101. printf("%2.2f %2.2f %2.2f\n", rv[1].avg_timing, rv[2].avg_timing, timing);
  102. }
  103. void construct_contexts(void (*bench)(unsigned, unsigned))
  104. {
  105. int nprocs1 = cpu1 + gpu + gpu1;
  106. int nprocs2 = cpu2 + gpu + gpu2;
  107. unsigned n_all_gpus = gpu + gpu1 + gpu2;
  108. int procs[nprocs1];
  109. int i;
  110. int k = 0;
  111. for(i = 0; i < gpu; i++)
  112. procs[k++] = i;
  113. for(i = gpu; i < gpu + gpu1; i++)
  114. procs[k++] = i;
  115. for(i = n_all_gpus; i < n_all_gpus + cpu1; i++)
  116. procs[k++] = i;
  117. p1.ctx = starpu_create_sched_ctx("heft", procs, nprocs1, "sched_ctx1");
  118. p2.the_other_ctx = (int)p1.ctx;
  119. p1.procs = procs;
  120. p1.nprocs = nprocs1;
  121. int procs2[nprocs2];
  122. k = 0;
  123. for(i = 0; i < gpu; i++)
  124. procs2[k++] = i;
  125. for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++)
  126. procs2[k++] = i;
  127. for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++)
  128. procs2[k++] = i;
  129. p2.ctx = starpu_create_sched_ctx("prio", procs2, nprocs2, "sched_ctx2");
  130. p1.the_other_ctx = (int)p2.ctx;
  131. p2.procs = procs2;
  132. p2.nprocs = nprocs2;
  133. }
  134. void parse_args_ctx(int argc, char **argv)
  135. {
  136. init();
  137. int i;
  138. for (i = 1; i < argc; i++) {
  139. if (strcmp(argv[i], "-size1") == 0) {
  140. char *argptr;
  141. size1 = strtol(argv[++i], &argptr, 10);
  142. }
  143. if (strcmp(argv[i], "-nblocks1") == 0) {
  144. char *argptr;
  145. nblocks1 = strtol(argv[++i], &argptr, 10);
  146. }
  147. if (strcmp(argv[i], "-size2") == 0) {
  148. char *argptr;
  149. size2 = strtol(argv[++i], &argptr, 10);
  150. }
  151. if (strcmp(argv[i], "-nblocks2") == 0) {
  152. char *argptr;
  153. nblocks2 = strtol(argv[++i], &argptr, 10);
  154. }
  155. if (strcmp(argv[i], "-cpu1") == 0) {
  156. char *argptr;
  157. cpu1 = strtol(argv[++i], &argptr, 10);
  158. }
  159. if (strcmp(argv[i], "-cpu2") == 0) {
  160. char *argptr;
  161. cpu2 = strtol(argv[++i], &argptr, 10);
  162. }
  163. if (strcmp(argv[i], "-gpu") == 0) {
  164. char *argptr;
  165. gpu = strtol(argv[++i], &argptr, 10);
  166. }
  167. if (strcmp(argv[i], "-gpu1") == 0) {
  168. char *argptr;
  169. gpu1 = strtol(argv[++i], &argptr, 10);
  170. }
  171. if (strcmp(argv[i], "-gpu2") == 0) {
  172. char *argptr;
  173. gpu2 = strtol(argv[++i], &argptr, 10);
  174. }
  175. }
  176. }