HPL_dlaswp01N.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. /*
  2. * -- High Performance Computing Linpack Benchmark (HPL)
  3. * HPL - 2.0 - September 10, 2008
  4. * Antoine P. Petitet
  5. * University of Tennessee, Knoxville
  6. * Innovative Computing Laboratory
  7. * (C) Copyright 2000-2008 All Rights Reserved
  8. *
  9. * -- Copyright notice and Licensing terms:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions, and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. *
  22. * 3. All advertising materials mentioning features or use of this
  23. * software must display the following acknowledgement:
  24. * This product includes software developed at the University of
  25. * Tennessee, Knoxville, Innovative Computing Laboratory.
  26. *
  27. * 4. The name of the University, the name of the Laboratory, or the
  28. * names of its contributors may not be used to endorse or promote
  29. * products derived from this software without specific written
  30. * permission.
  31. *
  32. * -- Disclaimer:
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
  38. * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. * ---------------------------------------------------------------------
  46. */
  47. /*
  48. * Include files
  49. */
  50. #include "hpl.h"
  51. /*
  52. * Define default value for unrolling factor
  53. */
  54. #ifndef HPL_LASWP01N_DEPTH
  55. #define HPL_LASWP01N_DEPTH 32
  56. #define HPL_LASWP01N_LOG2_DEPTH 5
  57. #endif
  58. #ifdef STDC_HEADERS
  59. void HPL_dlaswp01N
  60. (
  61. const int M,
  62. const int N,
  63. double * A,
  64. const int LDA,
  65. double * U,
  66. const int LDU,
  67. const int * LINDXA,
  68. const int * LINDXAU
  69. )
  70. #else
  71. void HPL_dlaswp01N
  72. ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
  73. const int M;
  74. const int N;
  75. double * A;
  76. const int LDA;
  77. double * U;
  78. const int LDU;
  79. const int * LINDXA;
  80. const int * LINDXAU;
  81. #endif
  82. {
  83. /*
  84. * Purpose
  85. * =======
  86. *
  87. * HPL_dlaswp01N copies scattered rows of A into itself and into an
  88. * array U. The row offsets in A of the source rows are specified by
  89. * LINDXA. The destination of those rows are specified by LINDXAU. A
  90. * positive value of LINDXAU indicates that the array destination is U,
  91. * and A otherwise.
  92. *
  93. * Arguments
  94. * =========
  95. *
  96. * M (local input) const int
  97. * On entry, M specifies the number of rows of A that should be
  98. * moved within A or copied into U. M must be at least zero.
  99. *
  100. * N (local input) const int
  101. * On entry, N specifies the length of rows of A that should be
  102. * moved within A or copied into U. N must be at least zero.
  103. *
  104. * A (local input/output) double *
  105. * On entry, A points to an array of dimension (LDA,N). The rows
  106. * of this array specified by LINDXA should be moved within A or
  107. * copied into U.
  108. *
  109. * LDA (local input) const int
  110. * On entry, LDA specifies the leading dimension of the array A.
  111. * LDA must be at least MAX(1,M).
  112. *
  113. * U (local input/output) double *
  114. * On entry, U points to an array of dimension (LDU,N). The rows
  115. * of A specified by LINDXA are be copied within this array U at
  116. * the positions indicated by positive values of LINDXAU.
  117. *
  118. * LDU (local input) const int
  119. * On entry, LDU specifies the leading dimension of the array U.
  120. * LDU must be at least MAX(1,M).
  121. *
  122. * LINDXA (local input) const int *
  123. * On entry, LINDXA is an array of dimension M that contains the
  124. * local row indexes of A that should be moved within A or
  125. * or copied into U.
  126. *
  127. * LINDXAU (local input) const int *
  128. * On entry, LINDXAU is an array of dimension M that contains
  129. * the local row indexes of U where the rows of A should be
  130. * copied at. This array also contains the local row offsets in
  131. * A where some of the rows of A should be moved to. A positive
  132. * value of LINDXAU[i] indicates that the row LINDXA[i] of A
  133. * should be copied into U at the position LINDXAU[i]; otherwise
  134. * the row LINDXA[i] of A should be moved at the position
  135. * -LINDXAU[i] within A.
  136. *
  137. * ---------------------------------------------------------------------
  138. */
  139. /*
  140. * .. Local Variables ..
  141. */
  142. double * a0, * a1;
  143. const int incA = (int)( (unsigned int)(LDA) <<
  144. HPL_LASWP01N_LOG2_DEPTH ),
  145. incU = (int)( (unsigned int)(LDU) <<
  146. HPL_LASWP01N_LOG2_DEPTH );
  147. int lda1, nu, nr;
  148. register int i, j;
  149. /* ..
  150. * .. Executable Statements ..
  151. */
  152. if( ( M <= 0 ) || ( N <= 0 ) ) return;
  153. nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
  154. HPL_LASWP01N_LOG2_DEPTH ) );
  155. for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
  156. {
  157. for( i = 0; i < M; i++ )
  158. {
  159. a0 = A + (size_t)(LINDXA[i]);
  160. if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
  161. else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
  162. *a1 = *a0; a1 += lda1; a0 += LDA;
  163. #if ( HPL_LASWP01N_DEPTH > 1 )
  164. *a1 = *a0; a1 += lda1; a0 += LDA;
  165. #endif
  166. #if ( HPL_LASWP01N_DEPTH > 2 )
  167. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  168. #endif
  169. #if ( HPL_LASWP01N_DEPTH > 4 )
  170. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  171. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  172. #endif
  173. #if ( HPL_LASWP01N_DEPTH > 8 )
  174. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  175. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  176. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  177. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  178. #endif
  179. #if ( HPL_LASWP01N_DEPTH > 16 )
  180. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  181. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  182. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  183. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  184. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  185. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  186. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  187. *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
  188. #endif
  189. }
  190. }
  191. if( nr )
  192. {
  193. for( i = 0; i < M; i++ )
  194. {
  195. a0 = A + (size_t)(LINDXA[i]);
  196. if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
  197. else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
  198. for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
  199. }
  200. }
  201. /*
  202. * End of HPL_dlaswp01N
  203. */
  204. }