cholesky_and_lu.c 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #include "cholesky/cholesky.h"
  2. #include <pthread.h>
  3. typedef struct {
  4. int start;
  5. int argc;
  6. char **argv;
  7. unsigned ctx;
  8. int the_other_ctx;
  9. int *procs;
  10. int ncpus;
  11. } params;
  12. typedef struct {
  13. double flops;
  14. double avg_timing;
  15. } retvals;
  16. #define NSAMPLES 3
  17. int first = 1;
  18. pthread_mutex_t mut;
  19. pthread_barrier_t barrier;
  20. void* func_cholesky(void *val){
  21. params *p = (params*)val;
  22. unsigned sched_ctx = p->ctx;
  23. int the_other_ctx = p->the_other_ctx;
  24. int i;
  25. retvals *rv = (retvals*)malloc(sizeof(retvals));
  26. rv->flops = 0;
  27. rv->avg_timing = 0;
  28. double timing = 0;
  29. for(i = 0; i < NSAMPLES; i++)
  30. {
  31. rv->flops += run_cholesky_implicit(sched_ctx, p->start, p->argc, p->argv, &timing, &barrier);
  32. rv->avg_timing += timing;
  33. }
  34. pthread_mutex_lock(&mut);
  35. if(first){
  36. starpu_delete_sched_ctx(p->ctx, the_other_ctx);
  37. }
  38. first = 0;
  39. pthread_mutex_unlock(&mut);
  40. rv->flops /= NSAMPLES;
  41. rv->avg_timing /= NSAMPLES;
  42. return (void*)rv;
  43. }
  44. void cholesky_vs_cholesky(params *p1, params *p2, params *p3,
  45. unsigned cpu1, unsigned cpu2,
  46. unsigned gpu, unsigned gpu1, unsigned gpu2){
  47. int ncpus1 = cpu1 + gpu + gpu1;
  48. int ncpus2 = cpu2 + gpu + gpu2;
  49. /* 2 cholesky in different ctxs */
  50. starpu_init(NULL);
  51. starpu_helper_cublas_init();
  52. int procs[ncpus1];
  53. int i;
  54. int k = 0;
  55. for(i = 0; i < gpu; i++)
  56. {
  57. procs[k++] = i;
  58. // printf("%d ", i);
  59. }
  60. for(i = gpu; i < gpu + gpu1; i++)
  61. {
  62. procs[k++] = i;
  63. //printf("%d ", i);
  64. }
  65. for(i = 3; i < 3 + cpu1; i++)
  66. {
  67. procs[k++] = i;
  68. //printf("%d ", i);
  69. }
  70. //printf("\n");
  71. p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
  72. p2->the_other_ctx = (int)p1->ctx;
  73. p1->procs = procs;
  74. p1->ncpus = ncpus1;
  75. int procs2[ncpus2];
  76. k = 0;
  77. for(i = 0; i < gpu; i++){
  78. procs2[k++] = i;
  79. // printf("%d ", i);
  80. }
  81. for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++){
  82. procs2[k++] = i;
  83. // printf("%d ", i);
  84. }
  85. for(i = 3 + cpu1; i < 3 + cpu1 + cpu2; i++){
  86. procs2[k++] = i;
  87. // printf("%d ", i);
  88. }
  89. // printf("\n");
  90. p2->ctx = starpu_create_sched_ctx("prio", procs2, ncpus2, "cholesky2");
  91. p1->the_other_ctx = (int)p2->ctx;
  92. p2->procs = procs2;
  93. p2->ncpus = ncpus2;
  94. pthread_t tid[2];
  95. pthread_barrier_init(&barrier, NULL, 2);
  96. pthread_mutex_init(&mut, NULL);
  97. struct timeval start;
  98. struct timeval end;
  99. gettimeofday(&start, NULL);
  100. pthread_create(&tid[0], NULL, (void*)func_cholesky, (void*)p1);
  101. pthread_create(&tid[1], NULL, (void*)func_cholesky, (void*)p2);
  102. void *gflops_cholesky1;
  103. void *gflops_cholesky2;
  104. pthread_join(tid[0], &gflops_cholesky1);
  105. pthread_join(tid[1], &gflops_cholesky2);
  106. gettimeofday(&end, NULL);
  107. pthread_mutex_destroy(&mut);
  108. starpu_helper_cublas_shutdown();
  109. starpu_shutdown();
  110. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  111. timing /= 1000000;
  112. // timing /= 60;
  113. printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
  114. printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
  115. /* printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, 0.0 ); */
  116. /* printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, 0.0, timing); */
  117. }
  118. int main(int argc, char **argv)
  119. {
  120. unsigned cpu1 = 0, cpu2 = 0;
  121. unsigned gpu = 0, gpu1 = 0, gpu2 = 0;
  122. int i;
  123. for (i = 9; i < argc; i++) {
  124. if (strcmp(argv[i], "-cpu1") == 0) {
  125. char *argptr;
  126. cpu1 = strtol(argv[++i], &argptr, 10);
  127. }
  128. if (strcmp(argv[i], "-cpu2") == 0) {
  129. char *argptr;
  130. cpu2 = strtol(argv[++i], &argptr, 10);
  131. }
  132. if (strcmp(argv[i], "-gpu") == 0) {
  133. char *argptr;
  134. gpu = strtol(argv[++i], &argptr, 10);
  135. }
  136. if (strcmp(argv[i], "-gpu1") == 0) {
  137. char *argptr;
  138. gpu1 = strtol(argv[++i], &argptr, 10);
  139. }
  140. if (strcmp(argv[i], "-gpu2") == 0) {
  141. char *argptr;
  142. gpu2 = strtol(argv[++i], &argptr, 10);
  143. }
  144. }
  145. params p1;
  146. p1.start = 1;
  147. p1.argc = 5;
  148. p1.argv = argv;
  149. params p2;
  150. p2.start = 5;
  151. p2.argc = 9;
  152. p2.argv = argv;
  153. params p3;
  154. p3.argc = argc;
  155. p3.argv = argv;
  156. p3.ctx = 0;
  157. cholesky_vs_cholesky(&p1, &p2,&p3, cpu1, cpu2, gpu, gpu1, gpu2);
  158. return 0;
  159. }