cholesky_and_lu.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. #include "cholesky/cholesky.h"
  2. #include <pthread.h>
  3. typedef struct {
  4. int start;
  5. int argc;
  6. char **argv;
  7. unsigned ctx;
  8. int the_other_ctx;
  9. int *procs;
  10. int ncpus;
  11. } params;
  12. typedef struct {
  13. double flops;
  14. double avg_timing;
  15. } retvals;
  16. #define NSAMPLES 3
  17. int first = 1;
  18. pthread_mutex_t mut;
  19. pthread_barrier_t barrier;
  20. void* func_cholesky(void *val){
  21. params *p = (params*)val;
  22. unsigned sched_ctx = p->ctx;
  23. int the_other_ctx = p->the_other_ctx;
  24. int i;
  25. retvals *rv = (retvals*)malloc(sizeof(retvals));
  26. rv->flops = 0;
  27. rv->avg_timing = 0;
  28. double timing = 0;
  29. for(i = 0; i < NSAMPLES; i++)
  30. {
  31. rv->flops += run_cholesky_implicit(sched_ctx, p->start, p->argc, p->argv, &timing, &barrier);
  32. rv->avg_timing += timing;
  33. }
  34. pthread_mutex_lock(&mut);
  35. if(first){
  36. starpu_delete_sched_ctx(p->ctx, the_other_ctx);
  37. }
  38. first = 0;
  39. pthread_mutex_unlock(&mut);
  40. rv->flops /= NSAMPLES;
  41. rv->avg_timing /= NSAMPLES;
  42. return (void*)rv;
  43. }
  44. void cholesky_vs_cholesky(params *p1, params *p2, params *p3,
  45. // unsigned cpu_start1, unsigned cpu_start2,
  46. // unsigned cpu_end1, unsigned cpu_end2,
  47. unsigned cpu1, unsigned cpu2,
  48. unsigned gpu, unsigned gpu1, unsigned gpu2){
  49. int ncpus1 = cpu1 + gpu + gpu1;
  50. int ncpus2 = cpu2 + gpu + gpu2;
  51. /* 2 cholesky in different ctxs */
  52. starpu_init(NULL);
  53. starpu_helper_cublas_init();
  54. int procs[ncpus1];
  55. int i;
  56. int k = 0;
  57. for(i = 0; i < gpu; i++)
  58. {
  59. procs[k++] = i;
  60. // printf("%d ", i);
  61. }
  62. for(i = gpu; i < gpu + gpu1; i++)
  63. {
  64. procs[k++] = i;
  65. // printf("%d ", i);
  66. }
  67. for(i = 3; i < 3 + cpu1; i++)
  68. {
  69. procs[k++] = i;
  70. // printf("%d ", i);
  71. }
  72. // printf("\n");
  73. p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
  74. p2->the_other_ctx = (int)p1->ctx;
  75. p1->procs = procs;
  76. p1->ncpus = ncpus1;
  77. int procs2[ncpus2];
  78. k = 0;
  79. for(i = 0; i < gpu; i++){
  80. procs2[k++] = i;
  81. //printf("%d ", i);
  82. }
  83. for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++){
  84. procs2[k++] = i;
  85. // printf("%d ", i);
  86. }
  87. for(i = 3 + cpu1; i < 3 + cpu1 + cpu2; i++){
  88. procs2[k++] = i;
  89. // printf("%d ", i);
  90. }
  91. // printf("\n");
  92. p2->ctx = starpu_create_sched_ctx("heft", procs2, ncpus2, "cholesky2");
  93. p1->the_other_ctx = (int)p2->ctx;
  94. p2->procs = procs2;
  95. p2->ncpus = ncpus2;
  96. pthread_t tid[2];
  97. pthread_barrier_init(&barrier, NULL, 2);
  98. pthread_mutex_init(&mut, NULL);
  99. struct timeval start;
  100. struct timeval end;
  101. gettimeofday(&start, NULL);
  102. pthread_create(&tid[0], NULL, (void*)func_cholesky, (void*)p1);
  103. pthread_create(&tid[1], NULL, (void*)func_cholesky, (void*)p2);
  104. void *gflops_cholesky1;
  105. void *gflops_cholesky2;
  106. pthread_join(tid[0], &gflops_cholesky1);
  107. pthread_join(tid[1], &gflops_cholesky2);
  108. gettimeofday(&end, NULL);
  109. pthread_mutex_destroy(&mut);
  110. starpu_helper_cublas_shutdown();
  111. starpu_shutdown();
  112. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  113. timing /= 1000000;
  114. // timing /= 60;
  115. printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
  116. printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
  117. //printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, 0.0 );
  118. // printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, 0.0, timing);
  119. }
  120. int main(int argc, char **argv)
  121. {
  122. unsigned cpu1 = 0, cpu2 = 0;
  123. unsigned gpu = 0, gpu1 = 0, gpu2 = 0;
  124. int i;
  125. for (i = 9; i < argc; i++) {
  126. if (strcmp(argv[i], "-cpu1") == 0) {
  127. char *argptr;
  128. cpu1 = strtol(argv[++i], &argptr, 10);
  129. }
  130. if (strcmp(argv[i], "-cpu2") == 0) {
  131. char *argptr;
  132. cpu2 = strtol(argv[++i], &argptr, 10);
  133. }
  134. if (strcmp(argv[i], "-gpu") == 0) {
  135. char *argptr;
  136. gpu = strtol(argv[++i], &argptr, 10);
  137. }
  138. if (strcmp(argv[i], "-gpu1") == 0) {
  139. char *argptr;
  140. gpu1 = strtol(argv[++i], &argptr, 10);
  141. }
  142. if (strcmp(argv[i], "-gpu2") == 0) {
  143. char *argptr;
  144. gpu2 = strtol(argv[++i], &argptr, 10);
  145. }
  146. }
  147. params p1;
  148. p1.start = 1;
  149. p1.argc = 5;
  150. p1.argv = argv;
  151. params p2;
  152. p2.start = 5;
  153. p2.argc = 9;
  154. p2.argv = argv;
  155. params p3;
  156. p3.argc = argc;
  157. p3.argv = argv;
  158. p3.ctx = 0;
  159. cholesky_vs_cholesky(&p1, &p2,&p3, cpu1, cpu2, gpu, gpu1, gpu2);
  160. return 0;
  161. }