pxlu_implicit.c 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2011, 2013-2015, 2017 Université de Bordeaux
  4. * Copyright (C) 2010, 2012, 2013 CNRS
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include "pxlu.h"
  18. #include "pxlu_kernels.h"
  19. #include <sys/time.h>
  20. //#define VERBOSE_INIT 1
  21. //#define DEBUG 1
  22. static unsigned no_prio = 0;
  23. static unsigned nblocks = 0;
  24. static int rank = -1;
  25. static int world_size = -1;
  26. struct callback_arg {
  27. unsigned i, j, k;
  28. };
  29. /*
  30. * Task 11 (diagonal factorization)
  31. */
  32. static void create_task_11(unsigned k)
  33. {
  34. starpu_mpi_task_insert(MPI_COMM_WORLD,
  35. &STARPU_PLU(cl11),
  36. STARPU_VALUE, &k, sizeof(k),
  37. STARPU_VALUE, &k, sizeof(k),
  38. STARPU_VALUE, &k, sizeof(k),
  39. STARPU_RW, STARPU_PLU(get_block_handle)(k, k),
  40. STARPU_PRIORITY, !no_prio ?
  41. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  42. 0);
  43. }
  44. /*
  45. * Task 12 (Update lower left (TRSM))
  46. */
  47. static void create_task_12(unsigned k, unsigned j)
  48. {
  49. #ifdef STARPU_DEVEL
  50. #warning temporary fix
  51. #endif
  52. starpu_mpi_task_insert(MPI_COMM_WORLD,
  53. //&STARPU_PLU(cl12),
  54. &STARPU_PLU(cl21),
  55. STARPU_VALUE, &j, sizeof(j),
  56. STARPU_VALUE, &j, sizeof(j),
  57. STARPU_VALUE, &k, sizeof(k),
  58. STARPU_R, STARPU_PLU(get_block_handle)(k, k),
  59. STARPU_RW, STARPU_PLU(get_block_handle)(k, j),
  60. STARPU_PRIORITY, !no_prio && (j == k+1) ?
  61. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  62. 0);
  63. }
  64. /*
  65. * Task 21 (Update upper right (TRSM))
  66. */
  67. static void create_task_21(unsigned k, unsigned i)
  68. {
  69. #ifdef STARPU_DEVEL
  70. #warning temporary fix
  71. #endif
  72. starpu_mpi_task_insert(MPI_COMM_WORLD,
  73. //&STARPU_PLU(cl21),
  74. &STARPU_PLU(cl12),
  75. STARPU_VALUE, &i, sizeof(i),
  76. STARPU_VALUE, &i, sizeof(i),
  77. STARPU_VALUE, &k, sizeof(k),
  78. STARPU_R, STARPU_PLU(get_block_handle)(k, k),
  79. STARPU_RW, STARPU_PLU(get_block_handle)(i, k),
  80. STARPU_PRIORITY, !no_prio && (i == k+1) ?
  81. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  82. 0);
  83. }
  84. /*
  85. * Task 22 (GEMM)
  86. */
  87. static void create_task_22(unsigned k, unsigned i, unsigned j)
  88. {
  89. starpu_mpi_task_insert(MPI_COMM_WORLD,
  90. &STARPU_PLU(cl22),
  91. STARPU_VALUE, &i, sizeof(i),
  92. STARPU_VALUE, &j, sizeof(j),
  93. STARPU_VALUE, &k, sizeof(k),
  94. STARPU_R, STARPU_PLU(get_block_handle)(k, j),
  95. STARPU_R, STARPU_PLU(get_block_handle)(i, k),
  96. STARPU_RW, STARPU_PLU(get_block_handle)(i, j),
  97. STARPU_PRIORITY, !no_prio && (i == k + 1) && (j == k +1) ?
  98. STARPU_MAX_PRIO : STARPU_MIN_PRIO,
  99. 0);
  100. }
  101. /*
  102. * code to bootstrap the factorization
  103. */
  104. double STARPU_PLU(plu_main)(unsigned _nblocks, int _rank, int _world_size)
  105. {
  106. double start;
  107. double end;
  108. nblocks = _nblocks;
  109. rank = _rank;
  110. world_size = _world_size;
  111. /* create all the DAG nodes */
  112. unsigned i,j,k;
  113. starpu_mpi_barrier(MPI_COMM_WORLD);
  114. start = starpu_timing_now();
  115. for (k = 0; k < nblocks; k++)
  116. {
  117. starpu_iteration_push(k);
  118. create_task_11(k);
  119. for (i = k+1; i<nblocks; i++)
  120. {
  121. create_task_12(k, i);
  122. create_task_21(k, i);
  123. }
  124. starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,k));
  125. if (get_block_rank(k, k) == _rank)
  126. starpu_data_wont_use(STARPU_PLU(get_block_handle)(k,k));
  127. for (i = k+1; i<nblocks; i++)
  128. {
  129. for (j = k+1; j<nblocks; j++)
  130. {
  131. create_task_22(k, i, j);
  132. }
  133. }
  134. for (i = k+1; i<nblocks; i++)
  135. {
  136. starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(k,i));
  137. if (get_block_rank(k, i) == _rank)
  138. starpu_data_wont_use(STARPU_PLU(get_block_handle)(k,i));
  139. starpu_mpi_cache_flush(MPI_COMM_WORLD, STARPU_PLU(get_block_handle)(i,k));
  140. if (get_block_rank(i, k) == _rank)
  141. starpu_data_wont_use(STARPU_PLU(get_block_handle)(i,k));
  142. }
  143. starpu_iteration_pop();
  144. }
  145. starpu_task_wait_for_all();
  146. starpu_mpi_barrier(MPI_COMM_WORLD);
  147. end = starpu_timing_now();
  148. double timing = end - start;
  149. // fprintf(stderr, "RANK %d -> took %f ms\n", rank, timing/1000);
  150. return timing;
  151. }