time_zpotrf_tile.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /**
  2. *
  3. * @precisions normal z -> c d s
  4. *
  5. **/
  6. #define _TYPE PLASMA_Complex64_t
  7. #define _PREC double
  8. #define _LAMCH LAPACKE_dlamch_work
  9. #define _NAME "PLASMA_zpotrf_Tile"
  10. /* See Lawn 41 page 120 */
  11. #define _FMULS (n * (1.0 / 6.0 * n + 0.5) * n)
  12. #define _FADDS (n * (1.0 / 6.0 * n ) * n)
  13. #include "./timing.c"
  14. int first = 1;
  15. pthread_mutex_t mut;
  16. void* start_Test(void *p)
  17. {
  18. PLASMA_enum uplo = ((params*)p)->uplo;
  19. magma_desc_t *descA = ((params*)p)->descA;
  20. unsigned ctx = ((params*)p)->ctx;
  21. unsigned the_other_ctx = ((params*)p)->the_other_ctx;
  22. if(ctx != 0)
  23. starpu_set_sched_ctx(&ctx);
  24. if(ctx == 1)
  25. {
  26. int i, j;
  27. int sum = 0;
  28. for(i = 0; i < 1000; i++)
  29. for(j = 0; j < 100; j++)
  30. {
  31. sum += i;
  32. printf("sum = %d\n", sum);
  33. }
  34. }
  35. real_Double_t t;
  36. ((params*)p)->t = -cWtime();
  37. MAGMA_zpotrf_Tile(uplo, descA);
  38. ((params*)p)->t += cWtime();
  39. printf("require stop resize\n");
  40. sched_ctx_hypervisor_stop_resize(the_other_ctx);
  41. /* if(ctx != 0) */
  42. /* { */
  43. /* pthread_mutex_lock(&mut); */
  44. /* if(first){ */
  45. /* starpu_delete_sched_ctx(ctx, the_other_ctx); */
  46. /* } */
  47. /* first = 0; */
  48. /* pthread_mutex_unlock(&mut); */
  49. /* } */
  50. return p;
  51. }
  52. static magma_desc_t* do_start_stuff(int *iparam, int n, PLASMA_Complex64_t *A, PLASMA_Complex64_t *AT)
  53. {
  54. PLASMA_Complex64_t *b, *bT, *x;
  55. real_Double_t t;
  56. magma_desc_t *descA = NULL;
  57. int nb, nt;
  58. int nrhs = iparam[TIMING_NRHS];
  59. int check = iparam[TIMING_CHECK];
  60. int nocpu = iparam[TIMING_NO_CPU];
  61. int lda = n;
  62. int ldb = n;
  63. int peak_profiling = iparam[TIMING_PEAK];
  64. int profiling = iparam[TIMING_PROFILE];
  65. nb = iparam[TIMING_NB];
  66. nt = n / nb + ((n % nb == 0) ? 0 : 1);
  67. /* Allocate Data */
  68. AT = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t));
  69. /* Check if unable to allocate memory */
  70. if ( !AT ){
  71. printf("Out of Memory \n ");
  72. exit(0);
  73. }
  74. /* Initialiaze Data */
  75. MAGMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, lda, n, 0, 0, n, n);
  76. MAGMA_zplghe_Tile((double)n, descA, 51 );
  77. /* Save AT in lapack layout for check */
  78. if ( check ) {
  79. A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t));
  80. MAGMA_zTile_to_Lapack( descA, (void*)A, n);
  81. }
  82. if ( profiling | peak_profiling )
  83. MAGMA_Enable( MAGMA_PROFILING_MODE );
  84. if (nocpu)
  85. morse_zlocality_allrestrict( MAGMA_CUDA );
  86. return descA;
  87. }
  88. static void do_end_stuff(int *iparam, double *dparam, magma_desc_t *descA, int n, PLASMA_enum uplo,
  89. PLASMA_Complex64_t *A, PLASMA_Complex64_t *AT)
  90. {
  91. PLASMA_Complex64_t *b, *bT, *x;
  92. real_Double_t t;
  93. magma_desc_t *descB = NULL;
  94. int nb, nt;
  95. int nrhs = iparam[TIMING_NRHS];
  96. int check = iparam[TIMING_CHECK];
  97. int nocpu = iparam[TIMING_NO_CPU];
  98. int lda = n;
  99. int ldb = n;
  100. int peak_profiling = iparam[TIMING_PEAK];
  101. int profiling = iparam[TIMING_PROFILE];
  102. if (nocpu)
  103. morse_zlocality_allrestore();
  104. if ( profiling | peak_profiling )
  105. MAGMA_Disable( MAGMA_PROFILING_MODE );
  106. /* Check the solution */
  107. if ( check )
  108. {
  109. b = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t));
  110. bT = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t));
  111. x = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t));
  112. LAPACKE_zlarnv_work(1, ISEED, ldb*nrhs, bT);
  113. MAGMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs);
  114. MAGMA_zTile_to_Lapack(descB, (void*)b, n);
  115. MAGMA_zpotrs_Tile( uplo, descA, descB);
  116. MAGMA_zTile_to_Lapack(descB, (void*)x, n);
  117. dparam[TIMING_RES] = zcheck_solution(n, n, nrhs, A, lda, b, x, ldb,
  118. &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]),
  119. &(dparam[TIMING_XNORM]));
  120. MAGMA_Desc_Destroy(&descB);
  121. free( A );
  122. free( b );
  123. free( bT );
  124. free( x );
  125. }
  126. MAGMA_Desc_Destroy(&descA);
  127. free(AT);
  128. if (peak_profiling) {
  129. real_Double_t peak = 0;
  130. /*estimate_zgemm_sustained_peak(&peak);*/
  131. dparam[TIMING_ESTIMATED_PEAK] = (double)peak;
  132. }
  133. if (profiling)
  134. {
  135. /* Profiling of the scheduler */
  136. morse_schedprofile_display();
  137. /* Profile of each kernel */
  138. morse_zdisplay_allprofile();
  139. }
  140. }
  141. static int
  142. RunTest(int *iparam, double *dparam, real_Double_t *t_)
  143. {
  144. PLASMA_Complex64_t *A1, *AT1, *A2, *AT2;
  145. int n1 = iparam[TIMING_N];
  146. int n2 = iparam[TIMING_N2];
  147. magma_desc_t *descA1 = NULL;
  148. magma_desc_t *descA2 = NULL;
  149. PLASMA_enum uplo1 = PlasmaLower;
  150. PLASMA_enum uplo2 = PlasmaLower;
  151. descA1 = do_start_stuff(iparam, n1, A1, AT1);
  152. descA2 = do_start_stuff(iparam, n2, A2, AT2);
  153. pthread_t tid[2];
  154. p1.uplo = uplo1;
  155. p1.descA = descA1;
  156. p2.uplo = uplo2;
  157. p2.descA = descA2;
  158. pthread_mutex_init(&mut, NULL);
  159. pthread_create(&tid[0], NULL, (void*)start_Test, (void*)&p1);
  160. pthread_create(&tid[1], NULL, (void*)start_Test, (void*)&p2);
  161. pthread_join(tid[0], &p1);
  162. pthread_join(tid[1], &p2);
  163. pthread_mutex_destroy(&mut);
  164. t1[0] = p1.t;
  165. t2[0] = p2.t;
  166. printf("t1 = %lf t2 = %lf \n", t1[0], t2[0]);
  167. do_end_stuff(iparam, dparam1, descA1, n1, uplo1, A1, AT1);
  168. do_end_stuff(iparam, dparam2, descA2, n2, uplo2, A2, AT2);
  169. return 0;
  170. }