pxlu_implicit.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2013,2015,2017,2018 CNRS
  4. * Copyright (C) 2010,2011,2013-2015,2017 Université de Bordeaux
  5. * Copyright (C) 2013 Thibaut Lambert
  6. * Copyright (C) 2012 Inria
  7. *
  8. * StarPU is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU Lesser General Public License as published by
  10. * the Free Software Foundation; either version 2.1 of the License, or (at
  11. * your option) any later version.
  12. *
  13. * StarPU is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. *
  17. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  18. */
  19. #include "pxlu.h"
  20. #include "pxlu_kernels.h"
  21. #include <sys/time.h>
  22. //#define VERBOSE_INIT 1
  23. //#define DEBUG 1
  24. static unsigned no_prio = 0;
  25. static unsigned nblocks = 0;
  26. static int rank = -1;
  27. static int world_size = -1;
  28. struct callback_arg
  29. {
  30. unsigned i, j, k;
  31. };
  32. /*
  33. * Task 11 (diagonal factorization)
  34. */
  35. static void create_task_11(unsigned k)
  36. {
  37. starpu_mpi_task_insert(MPI_COMM_WORLD,
  38. &STARPU_PLU(cl11),
  39. STARPU_VALUE, &k, sizeof(k),
  40. STARPU_VALUE, &k, sizeof(k),
  41. STARPU_VALUE, &k, sizeof(k),
  42. STARPU_RW, STARPU_PLU(get_block_handle)(k, k),
  43. STARPU_PRIORITY, !no_prio ?
  44. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  45. 0);
  46. }
  47. /*
  48. * Task 12 (Update lower left (TRSM))
  49. */
  50. static void create_task_12(unsigned k, unsigned j)
  51. {
  52. #ifdef STARPU_DEVEL
  53. #warning temporary fix
  54. #endif
  55. starpu_mpi_task_insert(MPI_COMM_WORLD,
  56. //&STARPU_PLU(cl12),
  57. &STARPU_PLU(cl21),
  58. STARPU_VALUE, &j, sizeof(j),
  59. STARPU_VALUE, &j, sizeof(j),
  60. STARPU_VALUE, &k, sizeof(k),
  61. STARPU_R, STARPU_PLU(get_block_handle)(k, k),
  62. STARPU_RW, STARPU_PLU(get_block_handle)(k, j),
  63. STARPU_PRIORITY, !no_prio && (j == k+1) ?
  64. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  65. 0);
  66. }
  67. /*
  68. * Task 21 (Update upper right (TRSM))
  69. */
  70. static void create_task_21(unsigned k, unsigned i)
  71. {
  72. #ifdef STARPU_DEVEL
  73. #warning temporary fix
  74. #endif
  75. starpu_mpi_task_insert(MPI_COMM_WORLD,
  76. //&STARPU_PLU(cl21),
  77. &STARPU_PLU(cl12),
  78. STARPU_VALUE, &i, sizeof(i),
  79. STARPU_VALUE, &i, sizeof(i),
  80. STARPU_VALUE, &k, sizeof(k),
  81. STARPU_R, STARPU_PLU(get_block_handle)(k, k),
  82. STARPU_RW, STARPU_PLU(get_block_handle)(i, k),
  83. STARPU_PRIORITY, !no_prio && (i == k+1) ?
  84. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  85. 0);
  86. }
  87. /*
  88. * Task 22 (GEMM)
  89. */
  90. static void create_task_22(unsigned k, unsigned i, unsigned j)
  91. {
  92. starpu_mpi_task_insert(MPI_COMM_WORLD,
  93. &STARPU_PLU(cl22),
  94. STARPU_VALUE, &i, sizeof(i),
  95. STARPU_VALUE, &j, sizeof(j),
  96. STARPU_VALUE, &k, sizeof(k),
  97. STARPU_R, STARPU_PLU(get_block_handle)(k, j),
  98. STARPU_R, STARPU_PLU(get_block_handle)(i, k),
  99. STARPU_RW, STARPU_PLU(get_block_handle)(i, j),
  100. STARPU_PRIORITY, !no_prio && (i == k + 1) && (j == k +1) ?
  101. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  102. 0);
  103. }
  104. /*
  105. * code to bootstrap the factorization
  106. */
  107. double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size, unsigned _no_prio)
  108. {
  109. double start;
  110. double end;
  111. nblocks = _nblocks;
  112. rank = _rank;
  113. world_size = _world_size;
  114. no_prio = _no_prio;
  115. /* create all the DAG nodes */
  116. unsigned i,j,k;
  117. starpu_mpi_barrier(MPI_COMM_WORLD);
  118. start = starpu_timing_now();
  119. for (k = 0; k < nblocks; k++)
  120. {
  121. starpu_iteration_push(k);
  122. create_task_11(k);
  123. for (i = k+1; i<nblocks; i++)
  124. {
  125. create_task_12(k, i);
  126. create_task_21(k, i);
  127. }
  128. starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,k));
  129. if (get_block_rank(k, k) == _rank)
  130. starpu_data_wont_use(STARPU_PLU(get_block_handle)(k,k));
  131. for (i = k+1; i<nblocks; i++)
  132. {
  133. for (j = k+1; j<nblocks; j++)
  134. {
  135. create_task_22(k, i, j);
  136. }
  137. }
  138. for (i = k+1; i<nblocks; i++)
  139. {
  140. starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,i));
  141. if (get_block_rank(k, i) == _rank)
  142. starpu_data_wont_use(STARPU_PLU(get_block_handle)(k,i));
  143. starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(i,k));
  144. if (get_block_rank(i, k) == _rank)
  145. starpu_data_wont_use(STARPU_PLU(get_block_handle)(i,k));
  146. }
  147. starpu_iteration_pop();
  148. }
  149. starpu_task_wait_for_all();
  150. starpu_mpi_barrier(MPI_COMM_WORLD);
  151. end = starpu_timing_now();
  152. double timing = end - start;
  153. // fprintf(stderr, "RANK %d -> took %f ms\n", rank, timing/1000);
  154. return timing;
  155. }