HPL_plindx1.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. /*
  2. * -- High Performance Computing Linpack Benchmark (HPL)
  3. * HPL - 2.0 - September 10, 2008
  4. * Antoine P. Petitet
  5. * University of Tennessee, Knoxville
  6. * Innovative Computing Laboratory
  7. * (C) Copyright 2000-2008 All Rights Reserved
  8. *
  9. * -- Copyright notice and Licensing terms:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions, and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * 3. All advertising materials mentioning features or use of this
  23. * software must display the following acknowledgement:
  24. * This product includes software developed at the University of
  25. * Tennessee, Knoxville, Innovative Computing Laboratory.
  26. *
  27. * 4. The name of the University, the name of the Laboratory, or the
  28. * names of its contributors may not be used to endorse or promote
  29. * products derived from this software without specific written
  30. * permission.
  31. *
  32. * -- Disclaimer:
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
  38. * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. * ---------------------------------------------------------------------
  46. */
  47. /*
  48. * Include files
  49. */
  50. #include "hpl.h"
  51. #ifdef STDC_HEADERS
  52. void HPL_plindx1
  53. (
  54. HPL_T_panel * PANEL,
  55. const int K,
  56. const int * IPID,
  57. int * IPA,
  58. int * LINDXA,
  59. int * LINDXAU,
  60. int * IPLEN,
  61. int * IPMAP,
  62. int * IPMAPM1,
  63. int * PERMU,
  64. int * IWORK
  65. )
  66. #else
  67. void HPL_plindx1
  68. ( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
  69. HPL_T_panel * PANEL;
  70. const int K;
  71. const int * IPID;
  72. int * IPA;
  73. int * LINDXA;
  74. int * LINDXAU;
  75. int * IPLEN;
  76. int * IPMAP;
  77. int * IPMAPM1;
  78. int * PERMU;
  79. int * IWORK;
  80. #endif
  81. {
  82. /*
  83. * Purpose
  84. * =======
  85. *
  86. * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing
  87. * the local source and final destination position resulting from the
  88. * application of row interchanges. In addition, this function computes
  89. * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic
  90. * mapping information for the spreading phase.
  91. *
  92. * Arguments
  93. * =========
  94. *
  95. * PANEL (local input/output) HPL_T_panel *
  96. * On entry, PANEL points to the data structure containing the
  97. * panel information.
  98. *
  99. * K (global input) const int
  100. * On entry, K specifies the number of entries in IPID. K is at
  101. * least 2*N, and at most 4*N.
  102. *
  103. * IPID (global input) const int *
  104. * On entry, IPID is an array of length K. The first K entries
  105. * of that array contain the src and final destination resulting
  106. * from the application of the interchanges.
  107. *
  108. * IPA (global output) int *
  109. * On exit, IPA specifies the number of rows that the current
  110. * process row has that either belong to U or should be swapped
  111. * with remote rows of A.
  112. *
  113. * LINDXA (global output) int *
  114. * On entry, LINDXA is an array of dimension 2*N. On exit, this
  115. * array contains the local indexes of the rows of A I have that
  116. * should be copied into U.
  117. *
  118. * LINDXAU (global output) int *
  119. * On exit, LINDXAU is an array of dimension 2*N. On exit, this
  120. * array contains the local destination information encoded as
  121. * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be
  122. * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
  123. * of A should be locally copied into A(-LINDXAU(k),:).
  124. *
  125. * IPLEN (global output) int *
  126. * On entry, IPLEN is an array of dimension NPROW + 1. On exit,
  127. * this array is such that IPLEN[i] is the number of rows of A
  128. * in the processes before process IPMAP[i] after the sort
  129. * with the convention that IPLEN[nprow] is the total number of
  130. * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the
  131. * local number of rows of A that should be moved to the process
  132. * IPMAP[i]. IPLEN is such that the number of rows of the source
  133. * process row can be computed as IPLEN[1] - IPLEN[0], and the
  134. * remaining entries of this array are sorted so that the
  135. * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
  136. *
  137. * IPMAP (global output) int *
  138. * On entry, IPMAP is an array of dimension NPROW. On exit, this
  139. * array contains the logarithmic mapping of the processes. In
  140. * other words, IPMAP[myrow] is the corresponding sorted process
  141. * coordinate.
  142. *
  143. * IPMAPM1 (global output) int *
  144. * On entry, IPMAPM1 is an array of dimension NPROW. On exit,
  145. * this array contains the inverse of the logarithmic mapping
  146. * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in
  147. * [0.. NPROCS)
  148. *
  149. * PERMU (global output) int *
  150. * On entry, PERMU is an array of dimension JB. On exit, PERMU
  151. * contains a sequence of permutations, that should be applied
  152. * in increasing order to permute in place the row panel U.
  153. *
  154. * IWORK (workspace) int *
  155. * On entry, IWORK is a workarray of dimension 2*JB.
  156. *
  157. * ---------------------------------------------------------------------
  158. */
  159. /*
  160. * .. Local Variables ..
  161. */
  162. int * iwork;
  163. int dst, dstrow, fndd, i, ia, icurrow, il,
  164. ip, ipU, iroff, j, jb, myrow, nb, nprow,
  165. src, srcrow;
  166. /* ..
  167. * .. Executable Statements ..
  168. */
  169. /*
  170. * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
  171. */
  172. HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
  173. /*
  174. * Compute the local arrays LINDXA and LINDXAU containing the local
  175. * source and final destination position resulting from the application
  176. * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA
  177. * elsewhere and PERMU in every process.
  178. */
  179. myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow;
  180. jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia;
  181. iroff = PANEL->ii; icurrow = PANEL->prow;
  182. iwork = IWORK + jb;
  183. if( myrow == icurrow )
  184. {
  185. for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
  186. {
  187. src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
  188. if( srcrow == icurrow )
  189. {
  190. dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
  191. Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
  192. LINDXA[ip] = il - iroff;
  193. if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
  194. {
  195. PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow];
  196. j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
  197. IPLEN[il]++; ipU++;
  198. }
  199. else if( dstrow != icurrow )
  200. {
  201. j = 0;
  202. do { fndd = ( dst == IPID[j] ); j+=2; }
  203. while( !fndd && ( j < K ) );
  204. PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
  205. j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
  206. IPLEN[il]++; ipU++;
  207. }
  208. else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
  209. {
  210. Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
  211. LINDXAU[ip] = iroff - il;
  212. }
  213. ip++;
  214. }
  215. }
  216. *IPA = ip;
  217. }
  218. else
  219. {
  220. for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
  221. {
  222. src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
  223. dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
  224. /*
  225. * LINDXA[i] is the local index of the row of A that belongs into U
  226. */
  227. if( myrow == dstrow )
  228. {
  229. Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
  230. LINDXA[ip] = il - iroff; ip++;
  231. }
  232. /*
  233. * iwork[i] is the local (current) position index in U
  234. * PERMU[i] is the local (final) destination index in U
  235. */
  236. if( srcrow == icurrow )
  237. {
  238. if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
  239. {
  240. PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow];
  241. iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
  242. }
  243. else if( dstrow != icurrow )
  244. {
  245. j = 0;
  246. do { fndd = ( dst == IPID[j] ); j+=2; }
  247. while( !fndd && ( j < K ) );
  248. PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
  249. iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
  250. }
  251. }
  252. }
  253. *IPA = 0;
  254. }
  255. /*
  256. * Simplify iwork and PERMU, return in PERMU the sequence of permutation
  257. * that need to be apply to U after it has been broadcast.
  258. */
  259. HPL_perm( jb, iwork, PERMU, IWORK );
  260. /*
  261. * Reset IPLEN to its correct value
  262. */
  263. for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
  264. IPLEN[0] = 0;
  265. /*
  266. * End of HPL_plindx1
  267. */
  268. }