HPL_dlaswp00N.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /*
  2. * -- High Performance Computing Linpack Benchmark (HPL)
  3. * HPL - 2.0 - September 10, 2008
  4. * Antoine P. Petitet
  5. * University of Tennessee, Knoxville
  6. * Innovative Computing Laboratory
  7. * (C) Copyright 2000-2008 All Rights Reserved
  8. *
  9. * -- Copyright notice and Licensing terms:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions, and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * 3. All advertising materials mentioning features or use of this
  23. * software must display the following acknowledgement:
  24. * This product includes software developed at the University of
  25. * Tennessee, Knoxville, Innovative Computing Laboratory.
  26. *
  27. * 4. The name of the University, the name of the Laboratory, or the
  28. * names of its contributors may not be used to endorse or promote
  29. * products derived from this software without specific written
  30. * permission.
  31. *
  32. * -- Disclaimer:
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
  38. * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. * ---------------------------------------------------------------------
  46. */
  47. /*
  48. * Include files
  49. */
  50. #include "hpl.h"
  51. /*
  52. * Define default value for unrolling factor
  53. */
  54. #ifndef HPL_LASWP00N_DEPTH
  55. #define HPL_LASWP00N_DEPTH 32
  56. #define HPL_LASWP00N_LOG2_DEPTH 5
  57. #endif
  58. #ifdef STDC_HEADERS
  59. void HPL_dlaswp00N
  60. (
  61. const int M,
  62. const int N,
  63. double * A,
  64. const int LDA,
  65. const int * IPIV
  66. )
  67. #else
  68. void HPL_dlaswp00N
  69. ( M, N, A, LDA, IPIV )
  70. const int M;
  71. const int N;
  72. double * A;
  73. const int LDA;
  74. const int * IPIV;
  75. #endif
  76. {
  77. /*
  78. * Purpose
  79. * =======
  80. *
  81. * HPL_dlaswp00N performs a series of local row interchanges on a matrix
  82. * A. One row interchange is initiated for rows 0 through M-1 of A.
  83. *
  84. * Arguments
  85. * =========
  86. *
  87. * M (local input) const int
  88. * On entry, M specifies the number of rows of the array A to be
  89. * interchanged. M must be at least zero.
  90. *
  91. * N (local input) const int
  92. * On entry, N specifies the number of columns of the array A.
  93. * N must be at least zero.
  94. *
  95. * A (local input/output) double *
  96. * On entry, A points to an array of dimension (LDA,N) to which
  97. * the row interchanges will be applied. On exit, the permuted
  98. * matrix.
  99. *
  100. * LDA (local input) const int
  101. * On entry, LDA specifies the leading dimension of the array A.
  102. * LDA must be at least MAX(1,M).
  103. *
  104. * IPIV (local input) const int *
  105. * On entry, IPIV is an array of size M that contains the
  106. * pivoting information. For k in [0..M), IPIV[k]=IROFF + l
  107. * implies that local rows k and l are to be interchanged.
  108. *
  109. * ---------------------------------------------------------------------
  110. */
  111. /*
  112. * .. Local Variables ..
  113. */
  114. register double r;
  115. double * a0, * a1;
  116. const int incA = (int)( (unsigned int)(LDA) <<
  117. HPL_LASWP00N_LOG2_DEPTH );
  118. int ip, nr, nu;
  119. register int i, j;
  120. /* ..
  121. * .. Executable Statements ..
  122. */
  123. if( ( M <= 0 ) || ( N <= 0 ) ) return;
  124. nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
  125. << HPL_LASWP00N_LOG2_DEPTH ) );
  126. for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
  127. {
  128. for( i = 0; i < M; i++ )
  129. {
  130. if( i != ( ip = IPIV[i] ) )
  131. {
  132. a0 = A + i; a1 = A + ip;
  133. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  134. #if ( HPL_LASWP00N_DEPTH > 1 )
  135. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  136. #endif
  137. #if ( HPL_LASWP00N_DEPTH > 2 )
  138. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  139. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  140. #endif
  141. #if ( HPL_LASWP00N_DEPTH > 4 )
  142. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  143. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  144. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  145. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  146. #endif
  147. #if ( HPL_LASWP00N_DEPTH > 8 )
  148. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  149. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  150. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  151. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  152. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  153. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  154. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  155. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  156. #endif
  157. #if ( HPL_LASWP00N_DEPTH > 16 )
  158. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  159. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  160. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  161. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  162. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  163. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  164. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  165. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  166. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  167. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  168. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  169. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  170. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  171. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  172. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  173. r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
  174. #endif
  175. }
  176. }
  177. }
  178. if( nr > 0 )
  179. {
  180. for( i = 0; i < M; i++ )
  181. {
  182. if( i != ( ip = IPIV[i] ) )
  183. {
  184. a0 = A + i; a1 = A + ip;
  185. for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
  186. { r = *a0; *a0 = *a1; *a1 = r; }
  187. }
  188. }
  189. }
  190. /*
  191. * End of HPL_dlaswp00N
  192. */
  193. }