sched_ctx_utils.c 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. #include "sched_ctx_utils.h"
  2. #include <starpu.h>
  3. unsigned size1;
  4. unsigned size2;
  5. unsigned nblocks1;
  6. unsigned nblocks2;
  7. unsigned cpu1;
  8. unsigned cpu2;
  9. unsigned gpu;
  10. unsigned gpu1;
  11. unsigned gpu2;
  12. typedef struct {
  13. unsigned id;
  14. unsigned ctx;
  15. int the_other_ctx;
  16. int *procs;
  17. int nprocs;
  18. void (*bench)(unsigned, unsigned);
  19. unsigned size;
  20. unsigned nblocks;
  21. } params;
  22. typedef struct {
  23. double flops;
  24. double avg_timing;
  25. } retvals;
  26. #define NSAMPLES 3
  27. int first = 1;
  28. pthread_mutex_t mut;
  29. retvals rv[2];
  30. params p1, p2;
  31. pthread_key_t key;
  32. void init()
  33. {
  34. size1 = 4*1024;
  35. size2 = 4*1024;
  36. nblocks1 = 16;
  37. nblocks2 = 16;
  38. cpu1 = 0;
  39. cpu2 = 0;
  40. gpu = 0;
  41. gpu1 = 0;
  42. gpu2 = 0;
  43. rv[0].flops = 0.0;
  44. rv[1].flops = 0.0;
  45. rv[1].avg_timing = 0.0;
  46. rv[1].avg_timing = 0.0;
  47. p1.ctx = 0;
  48. p2.ctx = 0;
  49. p1.id = 0;
  50. p2.id = 1;
  51. pthread_key_create(&key, NULL);
  52. }
  53. void update_sched_ctx_timing_results(double flops, double avg_timing)
  54. {
  55. unsigned *id = pthread_getspecific(key);
  56. rv[*id].flops += flops;
  57. rv[*id].avg_timing += avg_timing;
  58. }
  59. void* start_bench(void *val){
  60. params *p = (params*)val;
  61. int i;
  62. pthread_setspecific(key, &p->id);
  63. if(p->ctx != 0)
  64. starpu_set_sched_ctx(&p->ctx);
  65. for(i = 0; i < NSAMPLES; i++)
  66. p->bench(p->size, p->nblocks);
  67. if(p->ctx != 0)
  68. {
  69. pthread_mutex_lock(&mut);
  70. if(first){
  71. starpu_delete_sched_ctx(p->ctx, p->the_other_ctx);
  72. }
  73. first = 0;
  74. pthread_mutex_unlock(&mut);
  75. }
  76. rv[p->id].flops /= NSAMPLES;
  77. rv[p->id].avg_timing /= NSAMPLES;
  78. }
  79. void start_2benchs(void (*bench)(unsigned, unsigned))
  80. {
  81. p1.bench = bench;
  82. p1.size = size1;
  83. p1.nblocks = nblocks1;
  84. p2.bench = bench;
  85. p2.size = size2;
  86. p2.nblocks = nblocks2;
  87. pthread_t tid[2];
  88. pthread_mutex_init(&mut, NULL);
  89. struct timeval start;
  90. struct timeval end;
  91. gettimeofday(&start, NULL);
  92. pthread_create(&tid[0], NULL, (void*)start_bench, (void*)&p1);
  93. pthread_create(&tid[1], NULL, (void*)start_bench, (void*)&p2);
  94. pthread_join(tid[0], NULL);
  95. pthread_join(tid[1], NULL);
  96. gettimeofday(&end, NULL);
  97. pthread_mutex_destroy(&mut);
  98. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  99. timing /= 1000000;
  100. printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops);
  101. printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing);
  102. }
  103. void start_1stbench(void (*bench)(unsigned, unsigned))
  104. {
  105. p1.bench = bench;
  106. p1.size = size1;
  107. p1.nblocks = nblocks1;
  108. struct timeval start;
  109. struct timeval end;
  110. gettimeofday(&start, NULL);
  111. start_bench((void*)&p1);
  112. gettimeofday(&end, NULL);
  113. pthread_mutex_destroy(&mut);
  114. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  115. timing /= 1000000;
  116. printf("%2.2f %2.2f ", rv[0].flops);
  117. printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, timing);
  118. }
  119. void start_2ndbench(void (*bench)(unsigned, unsigned))
  120. {
  121. p2.bench = bench;
  122. p2.size = size2;
  123. p2.nblocks = nblocks2;
  124. struct timeval start;
  125. struct timeval end;
  126. gettimeofday(&start, NULL);
  127. start_bench((void*)&p2);
  128. gettimeofday(&end, NULL);
  129. pthread_mutex_destroy(&mut);
  130. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  131. timing /= 1000000;
  132. printf("%2.2f %2.2f ", rv[1].flops);
  133. printf("%2.2f %2.2f %2.2f\n", rv[1].avg_timing, timing);
  134. }
  135. void construct_contexts(void (*bench)(unsigned, unsigned))
  136. {
  137. int nprocs1 = cpu1 + gpu + gpu1;
  138. int nprocs2 = cpu2 + gpu + gpu2;
  139. unsigned n_all_gpus = gpu + gpu1 + gpu2;
  140. int procs[nprocs1];
  141. int i;
  142. int k = 0;
  143. for(i = 0; i < gpu; i++)
  144. procs[k++] = i;
  145. for(i = gpu; i < gpu + gpu1; i++)
  146. procs[k++] = i;
  147. for(i = n_all_gpus; i < n_all_gpus + cpu1; i++)
  148. procs[k++] = i;
  149. p1.ctx = starpu_create_sched_ctx("heft", procs, nprocs1, "sched_ctx1");
  150. p2.the_other_ctx = (int)p1.ctx;
  151. p1.procs = procs;
  152. p1.nprocs = nprocs1;
  153. int procs2[nprocs2];
  154. k = 0;
  155. for(i = 0; i < gpu; i++)
  156. procs2[k++] = i;
  157. for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++)
  158. procs2[k++] = i;
  159. for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++)
  160. procs2[k++] = i;
  161. p2.ctx = starpu_create_sched_ctx("heft", procs2, nprocs2, "sched_ctx2");
  162. p1.the_other_ctx = (int)p2.ctx;
  163. p2.procs = procs2;
  164. p2.nprocs = nprocs2;
  165. }
  166. void parse_args_ctx(int argc, char **argv)
  167. {
  168. init();
  169. int i;
  170. for (i = 1; i < argc; i++) {
  171. if (strcmp(argv[i], "-size1") == 0) {
  172. char *argptr;
  173. size1 = strtol(argv[++i], &argptr, 10);
  174. }
  175. if (strcmp(argv[i], "-nblocks1") == 0) {
  176. char *argptr;
  177. nblocks1 = strtol(argv[++i], &argptr, 10);
  178. }
  179. if (strcmp(argv[i], "-size2") == 0) {
  180. char *argptr;
  181. size2 = strtol(argv[++i], &argptr, 10);
  182. }
  183. if (strcmp(argv[i], "-nblocks2") == 0) {
  184. char *argptr;
  185. nblocks2 = strtol(argv[++i], &argptr, 10);
  186. }
  187. if (strcmp(argv[i], "-cpu1") == 0) {
  188. char *argptr;
  189. cpu1 = strtol(argv[++i], &argptr, 10);
  190. }
  191. if (strcmp(argv[i], "-cpu2") == 0) {
  192. char *argptr;
  193. cpu2 = strtol(argv[++i], &argptr, 10);
  194. }
  195. if (strcmp(argv[i], "-gpu") == 0) {
  196. char *argptr;
  197. gpu = strtol(argv[++i], &argptr, 10);
  198. }
  199. if (strcmp(argv[i], "-gpu1") == 0) {
  200. char *argptr;
  201. gpu1 = strtol(argv[++i], &argptr, 10);
  202. }
  203. if (strcmp(argv[i], "-gpu2") == 0) {
  204. char *argptr;
  205. gpu2 = strtol(argv[++i], &argptr, 10);
  206. }
  207. }
  208. }