cholesky_and_lu.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. #include "cholesky/cholesky.h"
  2. #include <pthread.h>
  3. typedef struct {
  4. int start;
  5. int argc;
  6. char **argv;
  7. unsigned ctx;
  8. int the_other_ctx;
  9. int *procs;
  10. int ncpus;
  11. } params;
  12. typedef struct {
  13. double flops;
  14. double avg_timing;
  15. } retvals;
  16. #define NSAMPLES 5
  17. int first = 1;
  18. pthread_mutex_t mut;
  19. pthread_barrier_t barrier;
  20. void* func_cholesky(void *val){
  21. params *p = (params*)val;
  22. unsigned sched_ctx = p->ctx;
  23. int the_other_ctx = p->the_other_ctx;
  24. int i;
  25. retvals *rv = (retvals*)malloc(sizeof(retvals));
  26. rv->flops = 0;
  27. rv->avg_timing = 0;
  28. double timing = 0;
  29. for(i = 0; i < NSAMPLES; i++)
  30. {
  31. rv->flops += run_cholesky_implicit(sched_ctx, p->start, p->argc, p->argv, &timing, &barrier);
  32. rv->avg_timing += timing;
  33. }
  34. pthread_mutex_lock(&mut);
  35. if(first)
  36. {
  37. starpu_delete_sched_ctx(p->ctx, the_other_ctx);
  38. // starpu_add_workers_to_sched_ctx(p->procs, p->ncpus, the_other_ctx);
  39. }
  40. first = 0;
  41. pthread_mutex_unlock(&mut);
  42. rv->flops /= NSAMPLES;
  43. rv->avg_timing /= NSAMPLES;
  44. return (void*)rv;
  45. }
  46. void cholesky_vs_cholesky(params *p1, params *p2, params *p3,
  47. unsigned cpu_start1, unsigned cpu_start2,
  48. unsigned cpu_end1, unsigned cpu_end2){
  49. int ncpus1 = cpu_end1 - cpu_start1;
  50. int ncpus2 = cpu_end2 - cpu_start2;
  51. /* 2 cholesky in different ctxs */
  52. starpu_init(NULL);
  53. starpu_helper_cublas_init();
  54. int procs[ncpus1];
  55. int i;
  56. int k = 0;
  57. for(i = cpu_start1; i < cpu_end1; i++)
  58. {
  59. printf("%d ", i);
  60. procs[k++] = i;
  61. }
  62. printf("\n");
  63. p1->ctx = starpu_create_sched_ctx("heft", procs, ncpus1, "cholesky1");
  64. p2->the_other_ctx = (int)p1->ctx;
  65. p1->procs = procs;
  66. p1->ncpus = ncpus1;
  67. int procs2[ncpus2];
  68. k = 0;
  69. for(i = cpu_start2; i < cpu_end2; i++){
  70. printf("%d ", i);
  71. procs2[k++] = i;
  72. }
  73. printf("\n");
  74. p2->ctx = starpu_create_sched_ctx("heft", procs2, ncpus2, "cholesky2");
  75. p1->the_other_ctx = (int)p2->ctx;
  76. p2->procs = procs2;
  77. p2->ncpus = ncpus2;
  78. pthread_t tid[2];
  79. pthread_barrier_init(&barrier, NULL, 2);
  80. pthread_mutex_init(&mut, NULL);
  81. struct timeval start;
  82. struct timeval end;
  83. gettimeofday(&start, NULL);
  84. pthread_create(&tid[0], NULL, (void*)func_cholesky, (void*)p1);
  85. pthread_create(&tid[1], NULL, (void*)func_cholesky, (void*)p2);
  86. void *gflops_cholesky1;
  87. void *gflops_cholesky2;
  88. pthread_join(tid[0], &gflops_cholesky1);
  89. pthread_join(tid[1], &gflops_cholesky2);
  90. gettimeofday(&end, NULL);
  91. pthread_mutex_destroy(&mut);
  92. starpu_helper_cublas_shutdown();
  93. starpu_shutdown();
  94. double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  95. timing /= 1000000;
  96. timing /= 60;
  97. printf("%2.2f %2.2f ", ((retvals*)gflops_cholesky1)->flops, ((retvals*)gflops_cholesky2)->flops);
  98. printf("%2.2f %2.2f %2.2f\n", ((retvals*)gflops_cholesky1)->avg_timing, ((retvals*)gflops_cholesky2)->avg_timing, timing);
  99. }
  100. int main(int argc, char **argv)
  101. {
  102. unsigned cpu_start1 = 0, cpu_end1 = 0, cpu_start2 = 0, cpu_end2 = 0;
  103. int i;
  104. for (i = 9; i < argc; i++) {
  105. if (strcmp(argv[i], "-cpu_start1") == 0) {
  106. char *argptr;
  107. cpu_start1 = strtol(argv[++i], &argptr, 10);
  108. }
  109. if (strcmp(argv[i], "-cpu_start2") == 0) {
  110. char *argptr;
  111. cpu_start2 = strtol(argv[++i], &argptr, 10);
  112. }
  113. if (strcmp(argv[i], "-cpu_end1") == 0) {
  114. char *argptr;
  115. cpu_end1 = strtol(argv[++i], &argptr, 10);
  116. }
  117. if (strcmp(argv[i], "-cpu_end2") == 0) {
  118. char *argptr;
  119. cpu_end2 = strtol(argv[++i], &argptr, 10);
  120. }
  121. }
  122. params p1;
  123. p1.start = 1;
  124. p1.argc = 5;
  125. p1.argv = argv;
  126. params p2;
  127. p2.start = 5;
  128. p2.argc = 9;
  129. p2.argv = argv;
  130. params p3;
  131. p3.argc = argc;
  132. p3.argv = argv;
  133. p3.ctx = 0;
  134. cholesky_vs_cholesky(&p1, &p2,&p3, cpu_start1, cpu_start2, cpu_end1, cpu_end2);
  135. return 0;
  136. }